[llvm] e05ce03 - PEI should be able to use backward walk in replaceFrameIndicesBackward.

Alexander Timofeev via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 15 06:20:44 PST 2022


Author: Alexander Timofeev
Date: 2022-11-15T15:20:25+01:00
New Revision: e05ce03cfa0b36e9b99149e21afcb1fc039df813

URL: https://github.com/llvm/llvm-project/commit/e05ce03cfa0b36e9b99149e21afcb1fc039df813
DIFF: https://github.com/llvm/llvm-project/commit/e05ce03cfa0b36e9b99149e21afcb1fc039df813.diff

LOG: PEI should be able to use backward walk in replaceFrameIndicesBackward.

The backward register scavenger has correct register
liveness information. PEI should leverage the backward register scavenger.

Reviewed By: arsenm

Differential Revision: https://reviews.llvm.org/D137574

Added: 
    

Modified: 
    llvm/include/llvm/CodeGen/TargetRegisterInfo.h
    llvm/lib/CodeGen/PrologEpilogInserter.cpp
    llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
    llvm/lib/Target/AMDGPU/SIRegisterInfo.h
    llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
    llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir
    llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll
    llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll
    llvm/test/CodeGen/AMDGPU/flat-scratch.ll
    llvm/test/CodeGen/AMDGPU/frame-index.mir
    llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll
    llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
    llvm/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir
    llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir
    llvm/test/CodeGen/AMDGPU/scratch-simple.ll
    llvm/test/CodeGen/AMDGPU/sgpr-spill-to-vmem-scc-clobber.mir
    llvm/test/CodeGen/AMDGPU/sgpr-spill-vmem-large-frame.mir
    llvm/test/CodeGen/AMDGPU/sgpr-spill.mir
    llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll
    llvm/test/CodeGen/AMDGPU/spill-offset-calculation.ll
    llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll
    llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir
    llvm/test/CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
index d55f88dd50e5..73d49002b752 100644
--- a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
@@ -1028,6 +1028,12 @@ class TargetRegisterInfo : public MCRegisterInfo {
     return false;
   }
 
+  /// Process frame indices in reverse block order. This changes the behavior of
+  /// the RegScavenger passed to eliminateFrameIndex. If this is true targets
+  /// should scavengeRegisterBackwards in eliminateFrameIndex. New targets
+  /// should prefer reverse scavenging behavior.
+  virtual bool supportsBackwardScavenger() const { return false; }
+
   /// This method must be overriden to eliminate abstract frame indices from
   /// instructions which may use them. The instruction referenced by the
   /// iterator contains an MO_FrameIndex operand which must be eliminated by

diff  --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index be0dd7fe4a52..5b80a40a2f45 100644
--- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -128,8 +128,16 @@ class PEI : public MachineFunctionPass {
   void replaceFrameIndices(MachineFunction &MF);
   void replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF,
                            int &SPAdj);
+  // Frame indices in debug values are encoded in a target independent
+  // way with simply the frame index and offset rather than any
+  // target-specific addressing mode.
   bool replaceFrameIndexDebugInstr(MachineFunction &MF, MachineInstr &MI,
                                    unsigned OpIdx, int SPAdj = 0);
+  // Does same as replaceFrameIndices but using the backward MIR walk and
+  // backward register scavenger walk. Does not yet support call sequence
+  // processing.
+  void replaceFrameIndicesBackward(MachineBasicBlock *BB, MachineFunction &MF,
+                                   int &SPAdj);
 
   void insertPrologEpilogCode(MachineFunction &MF);
   void insertZeroCallUsedRegs(MachineFunction &MF);
@@ -1438,6 +1446,70 @@ bool PEI::replaceFrameIndexDebugInstr(MachineFunction &MF, MachineInstr &MI,
   return false;
 }
 
+void PEI::replaceFrameIndicesBackward(MachineBasicBlock *BB,
+                                      MachineFunction &MF, int &SPAdj) {
+  assert(MF.getSubtarget().getRegisterInfo() &&
+         "getRegisterInfo() must be implemented!");
+
+  const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+
+  RS->enterBasicBlockEnd(*BB);
+  
+  for (MachineInstr &MI : make_early_inc_range(reverse(*BB))) {
+    
+    // Register scavenger backward step
+    MachineBasicBlock::iterator Step(MI);
+    for (unsigned i = 0; i != MI.getNumOperands(); ++i) {
+      if (!MI.getOperand(i).isFI())
+        continue;
+
+      if (replaceFrameIndexDebugInstr(MF, MI, i, SPAdj))
+        continue;
+
+      // If this instruction has a FrameIndex operand, we need to
+      // use that target machine register info object to eliminate
+      // it.
+
+      // TRI.eliminateFrameIndex may lower the frame index to a sequence of
+      // instructions. It also can remove/change instructions passed by the
+      // iterator and invalidate the iterator. We have to take care of this. For
+      // that we support two iterators: *Step* - points to the position up to
+      // which the scavenger should scan by the next iteration to have liveness
+      // information up to date. *Curr* - keeps track of the correct RS->MBBI -
+      // the scan start point. It points to the currently processed instruction
+      // right before the frame lowering.
+      //
+      // ITERATORS WORK AS FOLLOWS:
+      // *Step* is shifted one step back right before the frame lowering and
+      // one step forward right after it. No matter how many instructions were
+      // inserted, *Step* will be right after the position which is going to be
+      // processed in the next iteration, thus, in the correct position for the
+      // scavenger to go up to.
+      // *Curr* is shifted one step forward right before calling
+      // TRI.eliminateFrameIndex and one step backward after. Thus, we make sure
+      // it points right to the position that is the correct starting point for
+      // the scavenger to scan.
+      MachineBasicBlock::iterator Curr = ++RS->getCurrentPosition();
+
+      // Shift back
+      Step--;
+
+      TRI.eliminateFrameIndex(MI, SPAdj, i, RS);
+      // Restore to unify logic with a shift back that happens in the end of
+      // the outer loop.
+      Step++;
+      RS->skipTo(--Curr);
+    }
+
+    // Shift it to make RS collect reg info up to the current instruction.
+    if (Step != BB->begin())
+      Step--;
+
+    // Update register states.
+    RS->backward(Step);
+  }
+}
+
 void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF,
                               int &SPAdj) {
   assert(MF.getSubtarget().getRegisterInfo() &&
@@ -1446,6 +1518,9 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF,
   const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
   const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
 
+  if (RS && TRI.supportsBackwardScavenger())
+    return replaceFrameIndicesBackward(BB, MF, SPAdj);
+
   if (RS && FrameIndexEliminationScavenging)
     RS->enterBasicBlock(*BB);
 
@@ -1466,9 +1541,6 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF,
       if (!MI.getOperand(i).isFI())
         continue;
 
-      // Frame indices in debug values are encoded in a target independent
-      // way with simply the frame index and offset rather than any
-      // target-specific addressing mode.
       if (replaceFrameIndexDebugInstr(MF, MI, i, SPAdj))
         continue;
 

diff  --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 249de0d3892b..2b21e7e5794e 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -1364,7 +1364,7 @@ void SIRegisterInfo::buildSpillLoadStore(
     // TODO: Clobbering SCC is not necessary for scratch instructions in the
     // entry.
     if (RS) {
-      SOffset = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0, false);
+      SOffset = RS->scavengeRegisterBackwards(AMDGPU::SGPR_32RegClass, MI, false, 0, false);
 
       // Piggy back on the liveness scan we just did see if SCC is dead.
       CanClobberSCC = !RS->isRegUsed(AMDGPU::SCC);
@@ -1385,7 +1385,7 @@ void SIRegisterInfo::buildSpillLoadStore(
       UseVGPROffset = true;
 
       if (RS) {
-        TmpOffsetVGPR = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
+        TmpOffsetVGPR = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass, MI, false, 0);
       } else {
         assert(LiveRegs);
         for (MCRegister Reg : AMDGPU::VGPR_32RegClass) {
@@ -2249,7 +2249,8 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
         // Convert to a swizzled stack address by scaling by the wave size.
         // In an entry function/kernel the offset is already swizzled.
         bool IsSALU = isSGPRClass(TII->getOpRegClass(*MI, FIOperandNum));
-        bool LiveSCC = RS->isRegUsed(AMDGPU::SCC);
+        bool LiveSCC =
+            RS->isRegUsed(AMDGPU::SCC) && !MI->definesRegister(AMDGPU::SCC);
         const TargetRegisterClass *RC = IsSALU && !LiveSCC
                                             ? &AMDGPU::SReg_32RegClass
                                             : &AMDGPU::VGPR_32RegClass;

diff  --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
index 0e260c8016fc..d78d75a97dcc 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -149,6 +149,10 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
                           MachineBasicBlock &RestoreMBB, Register SGPR,
                           RegScavenger *RS) const;
 
+  bool supportsBackwardScavenger() const override {
+    return true;
+  }
+
   void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
                            unsigned FIOperandNum,
                            RegScavenger *RS) const override;

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll
index 54fc4ddd72ff..a607ccb6946a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll
@@ -155,52 +155,52 @@ define amdgpu_kernel void @kernel_caller_byval() {
 ; FLATSCR-NEXT:    v_mov_b32_e32 v0, 0
 ; FLATSCR-NEXT:    s_addc_u32 flat_scratch_hi, s1, 0
 ; FLATSCR-NEXT:    v_mov_b32_e32 v1, 0
+; FLATSCR-NEXT:    s_mov_b32 vcc_lo, 0
 ; FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
-; FLATSCR-NEXT:    s_mov_b32 s33, 0
-; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], vcc_hi offset:8
-; FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
-; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s33 offset:72
-; FLATSCR-NEXT:    s_mov_b32 s33, 0
+; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], vcc_lo offset:8
 ; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], vcc_hi offset:16
+; FLATSCR-NEXT:    s_mov_b32 s11, 0
+; FLATSCR-NEXT:    s_mov_b32 s10, 0
+; FLATSCR-NEXT:    s_mov_b32 s9, 0
+; FLATSCR-NEXT:    s_mov_b32 s8, 0
+; FLATSCR-NEXT:    s_mov_b32 s7, 0
+; FLATSCR-NEXT:    s_mov_b32 s6, 0
+; FLATSCR-NEXT:    s_mov_b32 s5, 0
+; FLATSCR-NEXT:    s_mov_b32 s1, 0
+; FLATSCR-NEXT:    s_mov_b32 s0, 0
+; FLATSCR-NEXT:    s_mov_b32 s4, 0
+; FLATSCR-NEXT:    s_mov_b32 s3, 0
+; FLATSCR-NEXT:    s_mov_b32 s2, 0
+; FLATSCR-NEXT:    s_mov_b32 vcc_lo, 0
 ; FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
-; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s33 offset:80
-; FLATSCR-NEXT:    s_mov_b32 s33, 0
-; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], vcc_hi offset:24
-; FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
-; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s33 offset:88
-; FLATSCR-NEXT:    s_mov_b32 s33, 0
-; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], vcc_hi offset:32
-; FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
-; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s33 offset:96
-; FLATSCR-NEXT:    s_mov_b32 s33, 0
-; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], vcc_hi offset:40
-; FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
-; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s33 offset:104
-; FLATSCR-NEXT:    s_mov_b32 s33, 0
-; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], vcc_hi offset:48
-; FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
-; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s33 offset:112
-; FLATSCR-NEXT:    s_mov_b32 s33, 0
-; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], vcc_hi offset:56
-; FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
-; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s33 offset:120
-; FLATSCR-NEXT:    s_mov_b32 s33, 0
-; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], vcc_hi offset:64
-; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s33 offset:128
-; FLATSCR-NEXT:    s_mov_b32 s33, 0
-; FLATSCR-NEXT:    scratch_load_dwordx2 v[0:1], off, s33 offset:8
-; FLATSCR-NEXT:    s_mov_b32 s33, 0
-; FLATSCR-NEXT:    scratch_load_dwordx2 v[2:3], off, s33 offset:16
-; FLATSCR-NEXT:    s_mov_b32 s33, 0
-; FLATSCR-NEXT:    scratch_load_dwordx2 v[4:5], off, s33 offset:24
-; FLATSCR-NEXT:    s_mov_b32 s33, 0
-; FLATSCR-NEXT:    scratch_load_dwordx2 v[6:7], off, s33 offset:32
-; FLATSCR-NEXT:    s_mov_b32 s33, 0
-; FLATSCR-NEXT:    scratch_load_dwordx2 v[8:9], off, s33 offset:40
-; FLATSCR-NEXT:    s_mov_b32 s33, 0
-; FLATSCR-NEXT:    scratch_load_dwordx2 v[10:11], off, s33 offset:48
-; FLATSCR-NEXT:    s_mov_b32 s33, 0
-; FLATSCR-NEXT:    scratch_load_dwordx2 v[12:13], off, s33 offset:56
+; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s11 offset:24
+; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s10 offset:32
+; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s9 offset:40
+; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s8 offset:48
+; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s7 offset:56
+; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s6 offset:64
+; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s5 offset:72
+; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s1 offset:80
+; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s0 offset:88
+; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s4 offset:96
+; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s3 offset:104
+; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s2 offset:112
+; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], vcc_lo offset:120
+; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], vcc_hi offset:128
+; FLATSCR-NEXT:    s_mov_b32 s40, 0
+; FLATSCR-NEXT:    scratch_load_dwordx2 v[0:1], off, s40 offset:8
+; FLATSCR-NEXT:    s_mov_b32 s39, 0
+; FLATSCR-NEXT:    scratch_load_dwordx2 v[2:3], off, s39 offset:16
+; FLATSCR-NEXT:    s_mov_b32 s38, 0
+; FLATSCR-NEXT:    scratch_load_dwordx2 v[4:5], off, s38 offset:24
+; FLATSCR-NEXT:    s_mov_b32 s37, 0
+; FLATSCR-NEXT:    scratch_load_dwordx2 v[6:7], off, s37 offset:32
+; FLATSCR-NEXT:    s_mov_b32 s36, 0
+; FLATSCR-NEXT:    scratch_load_dwordx2 v[8:9], off, s36 offset:40
+; FLATSCR-NEXT:    s_mov_b32 s35, 0
+; FLATSCR-NEXT:    scratch_load_dwordx2 v[10:11], off, s35 offset:48
+; FLATSCR-NEXT:    s_mov_b32 s34, 0
+; FLATSCR-NEXT:    scratch_load_dwordx2 v[12:13], off, s34 offset:56
 ; FLATSCR-NEXT:    s_mov_b32 s33, 0
 ; FLATSCR-NEXT:    scratch_load_dwordx2 v[14:15], off, s33 offset:64
 ; FLATSCR-NEXT:    s_movk_i32 s32, 0x50

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll
index cfa56f05fac7..1e4438526779 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll
@@ -308,10 +308,10 @@ define i16 @v_extract_v128i16_varidx(<128 x i16> addrspace(1)* %ptr, i32 %idx) {
 ; GCN-NEXT:    buffer_load_dword v30, off, s[0:3], s33 offset:568 ; 4-byte Folded Reload
 ; GCN-NEXT:    buffer_load_dword v31, off, s[0:3], s33 offset:572 ; 4-byte Folded Reload
 ; GCN-NEXT:    v_bfe_u32 v0, v6, 1, 6
-; GCN-NEXT:    v_lshrrev_b32_e64 v5, 6, s33
+; GCN-NEXT:    v_lshrrev_b32_e64 v2, 6, s33
 ; GCN-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GCN-NEXT:    v_add_u32_e32 v5, 0x100, v5
-; GCN-NEXT:    v_add_u32_e32 v0, v5, v0
+; GCN-NEXT:    v_add_u32_e32 v2, 0x100, v2
+; GCN-NEXT:    v_add_u32_e32 v0, v2, v0
 ; GCN-NEXT:    v_and_b32_e32 v1, 1, v6
 ; GCN-NEXT:    v_lshlrev_b32_e32 v1, 4, v1
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
@@ -487,10 +487,10 @@ define i64 @v_extract_v32i64_varidx(<32 x i64> addrspace(1)* %ptr, i32 %idx) {
 ; GCN-NEXT:    buffer_load_dword v30, off, s[0:3], s33 offset:568 ; 4-byte Folded Reload
 ; GCN-NEXT:    buffer_load_dword v31, off, s[0:3], s33 offset:572 ; 4-byte Folded Reload
 ; GCN-NEXT:    v_and_b32_e32 v0, 31, v6
-; GCN-NEXT:    v_lshrrev_b32_e64 v5, 6, s33
+; GCN-NEXT:    v_lshrrev_b32_e64 v2, 6, s33
 ; GCN-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
-; GCN-NEXT:    v_add_u32_e32 v5, 0x100, v5
-; GCN-NEXT:    v_add_u32_e32 v1, v5, v0
+; GCN-NEXT:    v_add_u32_e32 v2, 0x100, v2
+; GCN-NEXT:    v_add_u32_e32 v1, v2, v0
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    v_mov_b32_e32 v16, v20
 ; GCN-NEXT:    v_mov_b32_e32 v17, v21

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
index 8d0733ea0037..7d0029e9efa5 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
@@ -458,9 +458,9 @@ define void @store_load_vindex_small_offset_foo(i32 %idx) {
 ; GFX9-NEXT:    scratch_load_dword v1, off, s32 glc
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
-; GFX9-NEXT:    s_add_i32 vcc_hi, s32, 0x100
+; GFX9-NEXT:    s_add_i32 vcc_lo, s32, 0x100
 ; GFX9-NEXT:    v_and_b32_e32 v0, 15, v0
-; GFX9-NEXT:    v_add_u32_e32 v1, vcc_hi, v1
+; GFX9-NEXT:    v_add_u32_e32 v1, vcc_lo, v1
 ; GFX9-NEXT:    v_mov_b32_e32 v2, 15
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX9-NEXT:    s_add_i32 vcc_hi, s32, 0x100
@@ -477,13 +477,13 @@ define void @store_load_vindex_small_offset_foo(i32 %idx) {
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-NEXT:    v_and_b32_e32 v1, 15, v0
 ; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-NEXT:    s_add_i32 s0, s32, 0x100
 ; GFX10-NEXT:    s_add_i32 vcc_lo, s32, 0x100
 ; GFX10-NEXT:    v_mov_b32_e32 v2, 15
+; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 2, v1
+; GFX10-NEXT:    v_add_nc_u32_e32 v0, s0, v0
 ; GFX10-NEXT:    scratch_load_dword v3, off, s32 glc dlc
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 2, v1
-; GFX10-NEXT:    v_add_nc_u32_e32 v0, vcc_lo, v0
-; GFX10-NEXT:    s_add_i32 vcc_lo, s32, 0x100
 ; GFX10-NEXT:    v_add_nc_u32_e32 v1, vcc_lo, v1
 ; GFX10-NEXT:    scratch_store_dword v0, v2, off
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
@@ -585,16 +585,16 @@ define amdgpu_kernel void @store_load_sindex_large_offset_kernel(i32 %idx) {
 ; GFX940-NEXT:    scratch_load_dword v0, off, off offset:4 sc0 sc1
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-NEXT:    v_mov_b32_e32 v0, 15
+; GFX940-NEXT:    s_movk_i32 vcc_lo, 0x4004
 ; GFX940-NEXT:    s_movk_i32 vcc_hi, 0x4004
 ; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX940-NEXT:    s_lshl_b32 s1, s0, 2
 ; GFX940-NEXT:    s_and_b32 s0, s0, 15
 ; GFX940-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX940-NEXT:    s_lshl_b32 s0, s0, 2
-; GFX940-NEXT:    scratch_store_dword v1, v0, vcc_hi sc0 sc1
+; GFX940-NEXT:    scratch_store_dword v1, v0, vcc_lo sc0 sc1
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-NEXT:    v_mov_b32_e32 v0, s0
-; GFX940-NEXT:    s_movk_i32 vcc_hi, 0x4004
 ; GFX940-NEXT:    scratch_load_dword v0, v0, vcc_hi sc0 sc1
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-NEXT:    s_endpgm
@@ -613,9 +613,9 @@ define amdgpu_kernel void @store_load_sindex_large_offset_kernel(i32 %idx) {
 ; GFX11-NEXT:    s_lshl_b32 s0, s0, 2
 ; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
 ; GFX11-NEXT:    v_mov_b32_e32 v2, s0
-; GFX11-NEXT:    scratch_store_b32 v0, v1, vcc_lo dlc
+; GFX11-NEXT:    s_movk_i32 s0, 0x4004
+; GFX11-NEXT:    scratch_store_b32 v0, v1, s0 dlc
 ; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX11-NEXT:    s_movk_i32 vcc_lo, 0x4004
 ; GFX11-NEXT:    scratch_load_b32 v0, v2, vcc_lo glc dlc
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    s_endpgm
@@ -681,9 +681,9 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel() {
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
 ; GFX940-NEXT:    v_mov_b32_e32 v2, 15
-; GFX940-NEXT:    s_movk_i32 vcc_hi, 0x4004
+; GFX940-NEXT:    s_movk_i32 vcc_lo, 0x4004
 ; GFX940-NEXT:    v_sub_u32_e32 v0, 0, v0
-; GFX940-NEXT:    scratch_store_dword v1, v2, vcc_hi sc0 sc1
+; GFX940-NEXT:    scratch_store_dword v1, v2, vcc_lo sc0 sc1
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX940-NEXT:    s_movk_i32 vcc_hi, 0x4004
@@ -696,13 +696,13 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel() {
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v1, 0, v0
 ; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX11-NEXT:    v_mov_b32_e32 v2, 15
+; GFX11-NEXT:    s_movk_i32 s0, 0x4004
 ; GFX11-NEXT:    s_movk_i32 vcc_lo, 0x4004
+; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 2, v1
 ; GFX11-NEXT:    scratch_load_b32 v3, off, off offset:4 glc dlc
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 2, v1
-; GFX11-NEXT:    scratch_store_b32 v0, v2, vcc_lo dlc
+; GFX11-NEXT:    scratch_store_b32 v0, v2, s0 dlc
 ; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX11-NEXT:    s_movk_i32 vcc_lo, 0x4004
 ; GFX11-NEXT:    scratch_load_b32 v0, v1, vcc_lo offset:124 glc dlc
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    s_endpgm
@@ -731,9 +731,9 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) {
 ; GFX9-NEXT:    scratch_load_dword v1, off, s32 offset:4 glc
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
-; GFX9-NEXT:    s_add_i32 vcc_hi, s32, 0x4004
+; GFX9-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
 ; GFX9-NEXT:    v_and_b32_e32 v0, 15, v0
-; GFX9-NEXT:    v_add_u32_e32 v1, vcc_hi, v1
+; GFX9-NEXT:    v_add_u32_e32 v1, vcc_lo, v1
 ; GFX9-NEXT:    v_mov_b32_e32 v2, 15
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX9-NEXT:    s_add_i32 vcc_hi, s32, 0x4004
@@ -750,13 +750,13 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) {
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-NEXT:    v_and_b32_e32 v1, 15, v0
 ; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-NEXT:    s_add_i32 s0, s32, 0x4004
 ; GFX10-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
 ; GFX10-NEXT:    v_mov_b32_e32 v2, 15
+; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 2, v1
+; GFX10-NEXT:    v_add_nc_u32_e32 v0, s0, v0
 ; GFX10-NEXT:    scratch_load_dword v3, off, s32 offset:4 glc dlc
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 2, v1
-; GFX10-NEXT:    v_add_nc_u32_e32 v0, vcc_lo, v0
-; GFX10-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
 ; GFX10-NEXT:    v_add_nc_u32_e32 v1, vcc_lo, v1
 ; GFX10-NEXT:    scratch_store_dword v0, v2, off
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
@@ -771,9 +771,9 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) {
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
 ; GFX940-NEXT:    v_mov_b32_e32 v2, 15
-; GFX940-NEXT:    s_add_i32 vcc_hi, s32, 0x4004
+; GFX940-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
 ; GFX940-NEXT:    v_and_b32_e32 v0, 15, v0
-; GFX940-NEXT:    scratch_store_dword v1, v2, vcc_hi sc0 sc1
+; GFX940-NEXT:    scratch_store_dword v1, v2, vcc_lo sc0 sc1
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX940-NEXT:    s_add_i32 vcc_hi, s32, 0x4004
@@ -787,13 +787,14 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) {
 ; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX11-NEXT:    v_dual_mov_b32 v2, 15 :: v_dual_and_b32 v1, 15, v0
 ; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-NEXT:    s_add_i32 s0, s32, 0x4004
 ; GFX11-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 2, v1
 ; GFX11-NEXT:    scratch_load_b32 v3, off, s32 offset:4 glc dlc
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 2, v1
-; GFX11-NEXT:    scratch_store_b32 v0, v2, vcc_lo dlc
+; GFX11-NEXT:    scratch_store_b32 v0, v2, s0 dlc
 ; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX11-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
 ; GFX11-NEXT:    scratch_load_b32 v0, v1, vcc_lo glc dlc
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]

diff  --git a/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir b/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir
index 1efddc59e045..152e73d1b803 100644
--- a/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir
+++ b/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir
@@ -36,6 +36,8 @@ body:             |
   ; GFX908-NEXT:   S_NOP 0
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT: bb.2:
+  ; GFX908-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
+  ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT:   S_ENDPGM 0, amdgpu_allvgprs
   ; GFX90A-LABEL: name: agpr32_restore_clobber_scc
   ; GFX90A: bb.0:
@@ -280,7 +282,7 @@ body:             |
   ; GFX90A-NEXT:   S_NOP 0
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.2:
-  ; GFX90A-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55
+  ; GFX90A-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   $agpr255 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.225, addrspace 5)
   ; GFX90A-NEXT:   $agpr254 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.224, addrspace 5)
@@ -527,6 +529,8 @@ body:             |
   ; GFX908-FLATSCR-NEXT:   S_NOP 0
   ; GFX908-FLATSCR-NEXT: {{  $}}
   ; GFX908-FLATSCR-NEXT: bb.2:
+  ; GFX908-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
+  ; GFX908-FLATSCR-NEXT: {{  $}}
   ; GFX908-FLATSCR-NEXT:   S_ENDPGM 0, amdgpu_allvgprs
   ; GFX90A-FLATSCR-LABEL: name: agpr32_restore_clobber_scc
   ; GFX90A-FLATSCR: bb.0:
@@ -772,7 +776,7 @@ body:             |
   ; GFX90A-FLATSCR-NEXT:   S_NOP 0
   ; GFX90A-FLATSCR-NEXT: {{  $}}
   ; GFX90A-FLATSCR-NEXT: bb.2:
-  ; GFX90A-FLATSCR-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55
+  ; GFX90A-FLATSCR-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
   ; GFX90A-FLATSCR-NEXT: {{  $}}
   ; GFX90A-FLATSCR-NEXT:   $agpr255 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.225, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   $agpr254 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.224, addrspace 5)
@@ -1010,6 +1014,7 @@ body:             |
     S_NOP 0
 
   bb.2:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
     S_ENDPGM 0, amdgpu_allvgprs
 ...
 
@@ -1048,6 +1053,8 @@ body:             |
   ; GFX908-NEXT:   S_NOP 0
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT: bb.2:
+  ; GFX908-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
+  ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT:   S_ENDPGM 0, amdgpu_allvgprs
   ; GFX90A-LABEL: name: agpr64_restore_clobber_scc
   ; GFX90A: bb.0:
@@ -1293,7 +1300,7 @@ body:             |
   ; GFX90A-NEXT:   S_NOP 0
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.2:
-  ; GFX90A-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55
+  ; GFX90A-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   $agpr255 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.225, addrspace 5)
   ; GFX90A-NEXT:   $agpr254 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.224, addrspace 5)
@@ -1542,6 +1549,8 @@ body:             |
   ; GFX908-FLATSCR-NEXT:   S_NOP 0
   ; GFX908-FLATSCR-NEXT: {{  $}}
   ; GFX908-FLATSCR-NEXT: bb.2:
+  ; GFX908-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
+  ; GFX908-FLATSCR-NEXT: {{  $}}
   ; GFX908-FLATSCR-NEXT:   S_ENDPGM 0, amdgpu_allvgprs
   ; GFX90A-FLATSCR-LABEL: name: agpr64_restore_clobber_scc
   ; GFX90A-FLATSCR: bb.0:
@@ -1787,7 +1796,7 @@ body:             |
   ; GFX90A-FLATSCR-NEXT:   S_NOP 0
   ; GFX90A-FLATSCR-NEXT: {{  $}}
   ; GFX90A-FLATSCR-NEXT: bb.2:
-  ; GFX90A-FLATSCR-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55
+  ; GFX90A-FLATSCR-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
   ; GFX90A-FLATSCR-NEXT: {{  $}}
   ; GFX90A-FLATSCR-NEXT:   $agpr255 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.225, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   $agpr254 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.224, addrspace 5)
@@ -2025,6 +2034,7 @@ body:             |
     S_NOP 0
 
   bb.2:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
     S_ENDPGM 0, amdgpu_allvgprs
 ...
 
@@ -2065,6 +2075,8 @@ body:             |
   ; GFX908-NEXT:   S_NOP 0
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT: bb.2:
+  ; GFX908-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
+  ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT:   S_ENDPGM 0, amdgpu_allvgprs
   ; GFX90A-LABEL: name: agpr96_restore_clobber_scc
   ; GFX90A: bb.0:
@@ -2311,7 +2323,7 @@ body:             |
   ; GFX90A-NEXT:   S_NOP 0
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.2:
-  ; GFX90A-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55
+  ; GFX90A-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   $agpr255 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.225, addrspace 5)
   ; GFX90A-NEXT:   $agpr254 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.224, addrspace 5)
@@ -2562,6 +2574,8 @@ body:             |
   ; GFX908-FLATSCR-NEXT:   S_NOP 0
   ; GFX908-FLATSCR-NEXT: {{  $}}
   ; GFX908-FLATSCR-NEXT: bb.2:
+  ; GFX908-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
+  ; GFX908-FLATSCR-NEXT: {{  $}}
   ; GFX908-FLATSCR-NEXT:   S_ENDPGM 0, amdgpu_allvgprs
   ; GFX90A-FLATSCR-LABEL: name: agpr96_restore_clobber_scc
   ; GFX90A-FLATSCR: bb.0:
@@ -2807,7 +2821,7 @@ body:             |
   ; GFX90A-FLATSCR-NEXT:   S_NOP 0
   ; GFX90A-FLATSCR-NEXT: {{  $}}
   ; GFX90A-FLATSCR-NEXT: bb.2:
-  ; GFX90A-FLATSCR-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55
+  ; GFX90A-FLATSCR-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
   ; GFX90A-FLATSCR-NEXT: {{  $}}
   ; GFX90A-FLATSCR-NEXT:   $agpr255 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.225, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   $agpr254 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.224, addrspace 5)
@@ -3045,6 +3059,7 @@ body:             |
     S_NOP 0
 
   bb.2:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
     S_ENDPGM 0, amdgpu_allvgprs
 ...
 
@@ -3081,6 +3096,8 @@ body:             |
   ; GFX908-NEXT:   S_NOP 0
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT: bb.2:
+  ; GFX908-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0
+  ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT:   S_ENDPGM 0, amdgpu_allvgprs
   ; GFX90A-LABEL: name: agpr32_save_clobber_scc
   ; GFX90A: bb.0:
@@ -3325,7 +3342,7 @@ body:             |
   ; GFX90A-NEXT:   S_NOP 0
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.2:
-  ; GFX90A-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55
+  ; GFX90A-NEXT:   liveins: $agpr0, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   $agpr255 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.225, addrspace 5)
   ; GFX90A-NEXT:   $agpr254 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.224, addrspace 5)
@@ -3572,6 +3589,8 @@ body:             |
   ; GFX908-FLATSCR-NEXT:   S_NOP 0
   ; GFX908-FLATSCR-NEXT: {{  $}}
   ; GFX908-FLATSCR-NEXT: bb.2:
+  ; GFX908-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0
+  ; GFX908-FLATSCR-NEXT: {{  $}}
   ; GFX908-FLATSCR-NEXT:   S_ENDPGM 0, amdgpu_allvgprs
   ; GFX90A-FLATSCR-LABEL: name: agpr32_save_clobber_scc
   ; GFX90A-FLATSCR: bb.0:
@@ -3817,7 +3836,7 @@ body:             |
   ; GFX90A-FLATSCR-NEXT:   S_NOP 0
   ; GFX90A-FLATSCR-NEXT: {{  $}}
   ; GFX90A-FLATSCR-NEXT: bb.2:
-  ; GFX90A-FLATSCR-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55
+  ; GFX90A-FLATSCR-NEXT:   liveins: $agpr0, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
   ; GFX90A-FLATSCR-NEXT: {{  $}}
   ; GFX90A-FLATSCR-NEXT:   $agpr255 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.225, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   $agpr254 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.224, addrspace 5)
@@ -4055,6 +4074,7 @@ body:             |
     S_NOP 0
 
   bb.2:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0
     S_ENDPGM 0, amdgpu_allvgprs
 ...
 
@@ -4092,6 +4112,8 @@ body:             |
   ; GFX908-NEXT:   S_NOP 0
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT: bb.2:
+  ; GFX908-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0_agpr1
+  ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT:   S_ENDPGM 0, amdgpu_allvgprs
   ; GFX90A-LABEL: name: agpr64_save_clobber_scc
   ; GFX90A: bb.0:
@@ -4337,7 +4359,7 @@ body:             |
   ; GFX90A-NEXT:   S_NOP 0
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.2:
-  ; GFX90A-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55
+  ; GFX90A-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $agpr0_agpr1, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   $agpr255 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.225, addrspace 5)
   ; GFX90A-NEXT:   $agpr254 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.224, addrspace 5)
@@ -4586,6 +4608,8 @@ body:             |
   ; GFX908-FLATSCR-NEXT:   S_NOP 0
   ; GFX908-FLATSCR-NEXT: {{  $}}
   ; GFX908-FLATSCR-NEXT: bb.2:
+  ; GFX908-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0_agpr1
+  ; GFX908-FLATSCR-NEXT: {{  $}}
   ; GFX908-FLATSCR-NEXT:   S_ENDPGM 0, amdgpu_allvgprs
   ; GFX90A-FLATSCR-LABEL: name: agpr64_save_clobber_scc
   ; GFX90A-FLATSCR: bb.0:
@@ -4831,7 +4855,7 @@ body:             |
   ; GFX90A-FLATSCR-NEXT:   S_NOP 0
   ; GFX90A-FLATSCR-NEXT: {{  $}}
   ; GFX90A-FLATSCR-NEXT: bb.2:
-  ; GFX90A-FLATSCR-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55
+  ; GFX90A-FLATSCR-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $agpr0_agpr1, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
   ; GFX90A-FLATSCR-NEXT: {{  $}}
   ; GFX90A-FLATSCR-NEXT:   $agpr255 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.225, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   $agpr254 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.224, addrspace 5)
@@ -5069,6 +5093,7 @@ body:             |
     S_NOP 0
 
   bb.2:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0_agpr1
     S_ENDPGM 0, amdgpu_allvgprs
 ...
 ---
@@ -5107,6 +5132,8 @@ body:             |
   ; GFX908-NEXT:   S_NOP 0
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT: bb.2:
+  ; GFX908-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0_agpr1
+  ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT:   S_ENDPGM 0, amdgpu_allvgprs
   ; GFX90A-LABEL: name: agpr96_save_clobber_scc
   ; GFX90A: bb.0:
@@ -5353,7 +5380,7 @@ body:             |
   ; GFX90A-NEXT:   S_NOP 0
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.2:
-  ; GFX90A-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55
+  ; GFX90A-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $agpr0_agpr1, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   $agpr255 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.225, addrspace 5)
   ; GFX90A-NEXT:   $agpr254 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.224, addrspace 5)
@@ -5604,6 +5631,8 @@ body:             |
   ; GFX908-FLATSCR-NEXT:   S_NOP 0
   ; GFX908-FLATSCR-NEXT: {{  $}}
   ; GFX908-FLATSCR-NEXT: bb.2:
+  ; GFX908-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0_agpr1
+  ; GFX908-FLATSCR-NEXT: {{  $}}
   ; GFX908-FLATSCR-NEXT:   S_ENDPGM 0, amdgpu_allvgprs
   ; GFX90A-FLATSCR-LABEL: name: agpr96_save_clobber_scc
   ; GFX90A-FLATSCR: bb.0:
@@ -5849,7 +5878,7 @@ body:             |
   ; GFX90A-FLATSCR-NEXT:   S_NOP 0
   ; GFX90A-FLATSCR-NEXT: {{  $}}
   ; GFX90A-FLATSCR-NEXT: bb.2:
-  ; GFX90A-FLATSCR-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55
+  ; GFX90A-FLATSCR-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $agpr0_agpr1, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
   ; GFX90A-FLATSCR-NEXT: {{  $}}
   ; GFX90A-FLATSCR-NEXT:   $agpr255 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.225, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   $agpr254 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.224, addrspace 5)
@@ -6087,5 +6116,6 @@ body:             |
     S_NOP 0
 
   bb.2:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0_agpr1
     S_ENDPGM 0, amdgpu_allvgprs
 ...

diff  --git a/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll b/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll
index 0a234a5c4cb0..44e5a0b0000a 100644
--- a/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll
+++ b/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll
@@ -495,25 +495,25 @@ define amdgpu_kernel void @vload2_private(i16 addrspace(1)* nocapture readonly %
 ; FLATSCR-NEXT:    s_addc_u32 flat_scratch_hi, s3, 0
 ; FLATSCR-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; FLATSCR-NEXT:    v_mov_b32_e32 v2, 0
-; FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
+; FLATSCR-NEXT:    s_mov_b32 s5, 0
+; FLATSCR-NEXT:    s_mov_b32 s4, 0
+; FLATSCR-NEXT:    s_mov_b32 vcc_lo, 0
 ; FLATSCR-NEXT:    s_waitcnt lgkmcnt(0)
 ; FLATSCR-NEXT:    global_load_ushort v0, v2, s[0:1]
+; FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; FLATSCR-NEXT:    scratch_store_short off, v0, vcc_hi offset:4
+; FLATSCR-NEXT:    scratch_store_short off, v0, s5 offset:4
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
 ; FLATSCR-NEXT:    global_load_ushort v0, v2, s[0:1] offset:2
-; FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; FLATSCR-NEXT:    scratch_store_short off, v0, vcc_hi offset:6
+; FLATSCR-NEXT:    scratch_store_short off, v0, s4 offset:6
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
 ; FLATSCR-NEXT:    global_load_ushort v0, v2, s[0:1] offset:4
-; FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
+; FLATSCR-NEXT:    s_mov_b32 s0, 0
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; FLATSCR-NEXT:    scratch_store_short off, v0, vcc_hi offset:8
+; FLATSCR-NEXT:    scratch_store_short off, v0, s0 offset:8
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
-; FLATSCR-NEXT:    scratch_load_dword v0, off, vcc_hi offset:4
-; FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
+; FLATSCR-NEXT:    scratch_load_dword v0, off, vcc_lo offset:4
 ; FLATSCR-NEXT:    scratch_load_dword v1, off, vcc_hi offset:6
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
 ; FLATSCR-NEXT:    global_store_dwordx2 v2, v[0:1], s[2:3]
@@ -558,29 +558,27 @@ define amdgpu_kernel void @vload2_private(i16 addrspace(1)* nocapture readonly %
 ; FLATSCR_GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3
 ; FLATSCR_GFX10-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; FLATSCR_GFX10-NEXT:    v_mov_b32_e32 v2, 0
+; FLATSCR_GFX10-NEXT:    s_mov_b32 s5, 0
+; FLATSCR_GFX10-NEXT:    s_mov_b32 s4, 0
 ; FLATSCR_GFX10-NEXT:    s_mov_b32 vcc_lo, 0
 ; FLATSCR_GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; FLATSCR_GFX10-NEXT:    global_load_ushort v0, v2, s[0:1]
 ; FLATSCR_GFX10-NEXT:    s_waitcnt vmcnt(0)
-; FLATSCR_GFX10-NEXT:    scratch_store_short off, v0, vcc_lo offset:4
+; FLATSCR_GFX10-NEXT:    scratch_store_short off, v0, s5 offset:4
 ; FLATSCR_GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; FLATSCR_GFX10-NEXT:    global_load_ushort v0, v2, s[0:1] offset:2
-; FLATSCR_GFX10-NEXT:    s_waitcnt_depctr 0xffe3
-; FLATSCR_GFX10-NEXT:    s_mov_b32 vcc_lo, 0
 ; FLATSCR_GFX10-NEXT:    s_waitcnt vmcnt(0)
-; FLATSCR_GFX10-NEXT:    scratch_store_short off, v0, vcc_lo offset:6
+; FLATSCR_GFX10-NEXT:    scratch_store_short off, v0, s4 offset:6
 ; FLATSCR_GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; FLATSCR_GFX10-NEXT:    global_load_ushort v0, v2, s[0:1] offset:4
 ; FLATSCR_GFX10-NEXT:    s_waitcnt_depctr 0xffe3
-; FLATSCR_GFX10-NEXT:    s_mov_b32 vcc_lo, 0
+; FLATSCR_GFX10-NEXT:    s_mov_b32 s1, 0
+; FLATSCR_GFX10-NEXT:    s_mov_b32 s0, 0
 ; FLATSCR_GFX10-NEXT:    s_waitcnt vmcnt(0)
-; FLATSCR_GFX10-NEXT:    scratch_store_short off, v0, vcc_lo offset:8
+; FLATSCR_GFX10-NEXT:    scratch_store_short off, v0, s1 offset:8
 ; FLATSCR_GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
-; FLATSCR_GFX10-NEXT:    s_waitcnt_depctr 0xffe3
-; FLATSCR_GFX10-NEXT:    s_mov_b32 vcc_lo, 0
-; FLATSCR_GFX10-NEXT:    scratch_load_dword v0, off, vcc_lo offset:4
-; FLATSCR_GFX10-NEXT:    s_waitcnt_depctr 0xffe3
-; FLATSCR_GFX10-NEXT:    s_mov_b32 vcc_lo, 0
+; FLATSCR_GFX10-NEXT:    s_clause 0x1
+; FLATSCR_GFX10-NEXT:    scratch_load_dword v0, off, s0 offset:4
 ; FLATSCR_GFX10-NEXT:    scratch_load_dword v1, off, vcc_lo offset:6
 ; FLATSCR_GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; FLATSCR_GFX10-NEXT:    global_store_dwordx2 v2, v[0:1], s[2:3]

diff  --git a/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll
index 9edfbefa7fcd..91f3e3581fa2 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll
@@ -121,16 +121,16 @@ define amdgpu_kernel void @test(i32 addrspace(1)* %out, i32 %in) {
 ; FLAT_SCR_OPT-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
 ; FLAT_SCR_OPT-NEXT:    s_mov_b32 s104, exec_lo
 ; FLAT_SCR_OPT-NEXT:    s_mov_b32 exec_lo, 3
-; FLAT_SCR_OPT-NEXT:    s_mov_b32 s105, 0
-; FLAT_SCR_OPT-NEXT:    scratch_store_dword off, v72, s105
+; FLAT_SCR_OPT-NEXT:    s_mov_b32 s4, 0
+; FLAT_SCR_OPT-NEXT:    scratch_store_dword off, v72, s4
 ; FLAT_SCR_OPT-NEXT:    s_waitcnt lgkmcnt(0)
 ; FLAT_SCR_OPT-NEXT:    v_writelane_b32 v72, s2, 0
-; FLAT_SCR_OPT-NEXT:    s_mov_b32 s105, 4
+; FLAT_SCR_OPT-NEXT:    s_mov_b32 s4, 4
 ; FLAT_SCR_OPT-NEXT:    v_writelane_b32 v72, s3, 1
-; FLAT_SCR_OPT-NEXT:    scratch_store_dword off, v72, s105 ; 4-byte Folded Spill
+; FLAT_SCR_OPT-NEXT:    scratch_store_dword off, v72, s4 ; 4-byte Folded Spill
 ; FLAT_SCR_OPT-NEXT:    s_waitcnt_depctr 0xffe3
-; FLAT_SCR_OPT-NEXT:    s_mov_b32 s105, 0
-; FLAT_SCR_OPT-NEXT:    scratch_load_dword v72, off, s105
+; FLAT_SCR_OPT-NEXT:    s_mov_b32 s4, 0
+; FLAT_SCR_OPT-NEXT:    scratch_load_dword v72, off, s4
 ; FLAT_SCR_OPT-NEXT:    s_waitcnt vmcnt(0)
 ; FLAT_SCR_OPT-NEXT:    s_waitcnt_depctr 0xffe3
 ; FLAT_SCR_OPT-NEXT:    s_mov_b32 exec_lo, s104
@@ -255,16 +255,16 @@ define amdgpu_kernel void @test(i32 addrspace(1)* %out, i32 %in) {
 ; FLAT_SCR_ARCH-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
 ; FLAT_SCR_ARCH-NEXT:    s_mov_b32 s104, exec_lo
 ; FLAT_SCR_ARCH-NEXT:    s_mov_b32 exec_lo, 3
-; FLAT_SCR_ARCH-NEXT:    s_mov_b32 s105, 0
-; FLAT_SCR_ARCH-NEXT:    scratch_store_dword off, v72, s105
+; FLAT_SCR_ARCH-NEXT:    s_mov_b32 s4, 0
+; FLAT_SCR_ARCH-NEXT:    scratch_store_dword off, v72, s4
 ; FLAT_SCR_ARCH-NEXT:    s_waitcnt lgkmcnt(0)
 ; FLAT_SCR_ARCH-NEXT:    v_writelane_b32 v72, s2, 0
-; FLAT_SCR_ARCH-NEXT:    s_mov_b32 s105, 4
+; FLAT_SCR_ARCH-NEXT:    s_mov_b32 s4, 4
 ; FLAT_SCR_ARCH-NEXT:    v_writelane_b32 v72, s3, 1
-; FLAT_SCR_ARCH-NEXT:    scratch_store_dword off, v72, s105 ; 4-byte Folded Spill
+; FLAT_SCR_ARCH-NEXT:    scratch_store_dword off, v72, s4 ; 4-byte Folded Spill
 ; FLAT_SCR_ARCH-NEXT:    s_waitcnt_depctr 0xffe3
-; FLAT_SCR_ARCH-NEXT:    s_mov_b32 s105, 0
-; FLAT_SCR_ARCH-NEXT:    scratch_load_dword v72, off, s105
+; FLAT_SCR_ARCH-NEXT:    s_mov_b32 s4, 0
+; FLAT_SCR_ARCH-NEXT:    scratch_load_dword v72, off, s4
 ; FLAT_SCR_ARCH-NEXT:    s_waitcnt vmcnt(0)
 ; FLAT_SCR_ARCH-NEXT:    s_waitcnt_depctr 0xffe3
 ; FLAT_SCR_ARCH-NEXT:    s_mov_b32 exec_lo, s104

diff  --git a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll
index bb3133d99a15..d00a446ae039 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll
@@ -21,13 +21,13 @@ define amdgpu_kernel void @zero_init_kernel() {
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX9-NEXT:    v_mov_b32_e32 v2, s2
 ; GFX9-NEXT:    v_mov_b32_e32 v3, s3
+; GFX9-NEXT:    s_mov_b32 s1, 0
+; GFX9-NEXT:    s_mov_b32 s0, 0
+; GFX9-NEXT:    s_mov_b32 vcc_lo, 0
 ; GFX9-NEXT:    s_mov_b32 vcc_hi, 0
-; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:52
-; GFX9-NEXT:    s_mov_b32 vcc_hi, 0
-; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:36
-; GFX9-NEXT:    s_mov_b32 vcc_hi, 0
-; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:20
-; GFX9-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], s1 offset:52
+; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:36
+; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:20
 ; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:4
 ; GFX9-NEXT:    s_endpgm
 ;
@@ -74,6 +74,7 @@ define amdgpu_kernel void @zero_init_kernel() {
 ; GFX9-PAL-NEXT:    s_mov_b32 s2, s0
 ; GFX9-PAL-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
 ; GFX9-PAL-NEXT:    s_mov_b32 s0, 0
+; GFX9-PAL-NEXT:    s_mov_b32 vcc_lo, 0
 ; GFX9-PAL-NEXT:    s_mov_b32 vcc_hi, 0
 ; GFX9-PAL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-PAL-NEXT:    s_and_b32 s3, s3, 0xffff
@@ -86,12 +87,11 @@ define amdgpu_kernel void @zero_init_kernel() {
 ; GFX9-PAL-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX9-PAL-NEXT:    v_mov_b32_e32 v2, s2
 ; GFX9-PAL-NEXT:    v_mov_b32_e32 v3, s3
-; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:52
-; GFX9-PAL-NEXT:    s_mov_b32 vcc_hi, 0
-; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:36
-; GFX9-PAL-NEXT:    s_mov_b32 vcc_hi, 0
-; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:20
-; GFX9-PAL-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-PAL-NEXT:    s_mov_b32 s1, 0
+; GFX9-PAL-NEXT:    s_mov_b32 s0, 0
+; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s1 offset:52
+; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:36
+; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:20
 ; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:4
 ; GFX9-PAL-NEXT:    s_endpgm
 ;
@@ -129,15 +129,12 @@ define amdgpu_kernel void @zero_init_kernel() {
 ; GFX1010-PAL-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX1010-PAL-NEXT:    v_mov_b32_e32 v2, s2
 ; GFX1010-PAL-NEXT:    v_mov_b32_e32 v3, s3
-; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:52
-; GFX1010-PAL-NEXT:    s_waitcnt_depctr 0xffe3
-; GFX1010-PAL-NEXT:    s_mov_b32 vcc_lo, 0
-; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:36
-; GFX1010-PAL-NEXT:    s_waitcnt_depctr 0xffe3
-; GFX1010-PAL-NEXT:    s_mov_b32 vcc_lo, 0
-; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:20
-; GFX1010-PAL-NEXT:    s_waitcnt_depctr 0xffe3
-; GFX1010-PAL-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX1010-PAL-NEXT:    s_mov_b32 s2, 0
+; GFX1010-PAL-NEXT:    s_mov_b32 s1, 0
+; GFX1010-PAL-NEXT:    s_mov_b32 s0, 0
+; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s2 offset:52
+; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s1 offset:36
+; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:20
 ; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:4
 ; GFX1010-PAL-NEXT:    s_endpgm
 ;
@@ -971,8 +968,8 @@ define amdgpu_kernel void @zero_init_small_offset_kernel() {
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_add_u32 flat_scratch_lo, s0, s3
 ; GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s1, 0
-; GFX9-NEXT:    s_mov_b32 vcc_hi, 0
-; GFX9-NEXT:    scratch_load_dword v0, off, vcc_hi offset:4 glc
+; GFX9-NEXT:    s_mov_b32 s4, 0
+; GFX9-NEXT:    scratch_load_dword v0, off, s4 offset:4 glc
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_mov_b32 s0, 0
 ; GFX9-NEXT:    s_mov_b32 s1, s0
@@ -982,13 +979,13 @@ define amdgpu_kernel void @zero_init_small_offset_kernel() {
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX9-NEXT:    v_mov_b32_e32 v2, s2
 ; GFX9-NEXT:    v_mov_b32_e32 v3, s3
+; GFX9-NEXT:    s_mov_b32 s1, 0
+; GFX9-NEXT:    s_mov_b32 s0, 0
+; GFX9-NEXT:    s_mov_b32 vcc_lo, 0
 ; GFX9-NEXT:    s_mov_b32 vcc_hi, 0
-; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:260
-; GFX9-NEXT:    s_mov_b32 vcc_hi, 0
-; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:276
-; GFX9-NEXT:    s_mov_b32 vcc_hi, 0
-; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:292
-; GFX9-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], s1 offset:260
+; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:276
+; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:292
 ; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:308
 ; GFX9-NEXT:    s_endpgm
 ;
@@ -1038,13 +1035,15 @@ define amdgpu_kernel void @zero_init_small_offset_kernel() {
 ; GFX9-PAL-NEXT:    s_getpc_b64 s[2:3]
 ; GFX9-PAL-NEXT:    s_mov_b32 s2, s0
 ; GFX9-PAL-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
-; GFX9-PAL-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-PAL-NEXT:    s_mov_b32 s4, 0
 ; GFX9-PAL-NEXT:    s_mov_b32 s0, 0
+; GFX9-PAL-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX9-PAL-NEXT:    s_mov_b32 vcc_hi, 0
 ; GFX9-PAL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-PAL-NEXT:    s_and_b32 s3, s3, 0xffff
 ; GFX9-PAL-NEXT:    s_add_u32 flat_scratch_lo, s2, s1
 ; GFX9-PAL-NEXT:    s_addc_u32 flat_scratch_hi, s3, 0
-; GFX9-PAL-NEXT:    scratch_load_dword v0, off, vcc_hi offset:4 glc
+; GFX9-PAL-NEXT:    scratch_load_dword v0, off, s4 offset:4 glc
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_mov_b32 s1, s0
 ; GFX9-PAL-NEXT:    s_mov_b32 s2, s0
@@ -1053,13 +1052,11 @@ define amdgpu_kernel void @zero_init_small_offset_kernel() {
 ; GFX9-PAL-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX9-PAL-NEXT:    v_mov_b32_e32 v2, s2
 ; GFX9-PAL-NEXT:    v_mov_b32_e32 v3, s3
-; GFX9-PAL-NEXT:    s_mov_b32 vcc_hi, 0
-; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:260
-; GFX9-PAL-NEXT:    s_mov_b32 vcc_hi, 0
-; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:276
-; GFX9-PAL-NEXT:    s_mov_b32 vcc_hi, 0
-; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:292
-; GFX9-PAL-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-PAL-NEXT:    s_mov_b32 s1, 0
+; GFX9-PAL-NEXT:    s_mov_b32 s0, 0
+; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s1 offset:260
+; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:276
+; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:292
 ; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:308
 ; GFX9-PAL-NEXT:    s_endpgm
 ;
@@ -1090,9 +1087,9 @@ define amdgpu_kernel void @zero_init_small_offset_kernel() {
 ; GFX1010-PAL-NEXT:    s_addc_u32 s3, s3, 0
 ; GFX1010-PAL-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2
 ; GFX1010-PAL-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3
-; GFX1010-PAL-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX1010-PAL-NEXT:    s_mov_b32 s4, 0
 ; GFX1010-PAL-NEXT:    s_mov_b32 s0, 0
-; GFX1010-PAL-NEXT:    scratch_load_dword v0, off, vcc_lo offset:4 glc dlc
+; GFX1010-PAL-NEXT:    scratch_load_dword v0, off, s4 offset:4 glc dlc
 ; GFX1010-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX1010-PAL-NEXT:    s_mov_b32 s1, s0
 ; GFX1010-PAL-NEXT:    s_mov_b32 s2, s0
@@ -1101,16 +1098,13 @@ define amdgpu_kernel void @zero_init_small_offset_kernel() {
 ; GFX1010-PAL-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX1010-PAL-NEXT:    v_mov_b32_e32 v2, s2
 ; GFX1010-PAL-NEXT:    v_mov_b32_e32 v3, s3
+; GFX1010-PAL-NEXT:    s_mov_b32 s2, 0
+; GFX1010-PAL-NEXT:    s_mov_b32 s1, 0
+; GFX1010-PAL-NEXT:    s_mov_b32 s0, 0
 ; GFX1010-PAL-NEXT:    s_mov_b32 vcc_lo, 0
-; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:260
-; GFX1010-PAL-NEXT:    s_waitcnt_depctr 0xffe3
-; GFX1010-PAL-NEXT:    s_mov_b32 vcc_lo, 0
-; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:276
-; GFX1010-PAL-NEXT:    s_waitcnt_depctr 0xffe3
-; GFX1010-PAL-NEXT:    s_mov_b32 vcc_lo, 0
-; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:292
-; GFX1010-PAL-NEXT:    s_waitcnt_depctr 0xffe3
-; GFX1010-PAL-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s2 offset:260
+; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s1 offset:276
+; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:292
 ; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:308
 ; GFX1010-PAL-NEXT:    s_endpgm
 ;
@@ -1888,13 +1882,13 @@ define void @store_load_vindex_small_offset_foo(i32 %idx) {
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-NEXT:    v_and_b32_e32 v1, 15, v0
+; GFX10-NEXT:    s_add_i32 s0, s32, 0x100
 ; GFX10-NEXT:    s_add_i32 vcc_lo, s32, 0x100
+; GFX10-NEXT:    v_lshl_add_u32 v0, v0, 2, s0
 ; GFX10-NEXT:    v_mov_b32_e32 v2, 15
-; GFX10-NEXT:    v_lshl_add_u32 v0, v0, 2, vcc_lo
-; GFX10-NEXT:    s_add_i32 vcc_lo, s32, 0x100
+; GFX10-NEXT:    v_lshl_add_u32 v1, v1, 2, vcc_lo
 ; GFX10-NEXT:    scratch_load_dword v3, off, s32 glc dlc
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    v_lshl_add_u32 v1, v1, 2, vcc_lo
 ; GFX10-NEXT:    scratch_store_dword v0, v2, off
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-NEXT:    scratch_load_dword v0, v1, off glc dlc
@@ -1953,13 +1947,13 @@ define void @store_load_vindex_small_offset_foo(i32 %idx) {
 ; GFX10-PAL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-PAL-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-PAL-NEXT:    v_and_b32_e32 v1, 15, v0
+; GFX10-PAL-NEXT:    s_add_i32 s0, s32, 0x100
 ; GFX10-PAL-NEXT:    s_add_i32 vcc_lo, s32, 0x100
+; GFX10-PAL-NEXT:    v_lshl_add_u32 v0, v0, 2, s0
 ; GFX10-PAL-NEXT:    v_mov_b32_e32 v2, 15
-; GFX10-PAL-NEXT:    v_lshl_add_u32 v0, v0, 2, vcc_lo
-; GFX10-PAL-NEXT:    s_add_i32 vcc_lo, s32, 0x100
+; GFX10-PAL-NEXT:    v_lshl_add_u32 v1, v1, 2, vcc_lo
 ; GFX10-PAL-NEXT:    scratch_load_dword v3, off, s32 glc dlc
 ; GFX10-PAL-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-PAL-NEXT:    v_lshl_add_u32 v1, v1, 2, vcc_lo
 ; GFX10-PAL-NEXT:    scratch_store_dword v0, v2, off
 ; GFX10-PAL-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-PAL-NEXT:    scratch_load_dword v0, v1, off glc dlc
@@ -2015,8 +2009,8 @@ define amdgpu_kernel void @zero_init_large_offset_kernel() {
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_add_u32 flat_scratch_lo, s0, s3
 ; GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s1, 0
-; GFX9-NEXT:    s_mov_b32 vcc_hi, 0
-; GFX9-NEXT:    scratch_load_dword v0, off, vcc_hi offset:4 glc
+; GFX9-NEXT:    s_mov_b32 s4, 0
+; GFX9-NEXT:    scratch_load_dword v0, off, s4 offset:4 glc
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_mov_b32 s0, 0
 ; GFX9-NEXT:    s_mov_b32 s1, s0
@@ -2026,13 +2020,13 @@ define amdgpu_kernel void @zero_init_large_offset_kernel() {
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX9-NEXT:    v_mov_b32_e32 v2, s2
 ; GFX9-NEXT:    v_mov_b32_e32 v3, s3
+; GFX9-NEXT:    s_movk_i32 s1, 0x4004
+; GFX9-NEXT:    s_movk_i32 s0, 0x4004
+; GFX9-NEXT:    s_movk_i32 vcc_lo, 0x4004
 ; GFX9-NEXT:    s_movk_i32 vcc_hi, 0x4004
-; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi
-; GFX9-NEXT:    s_movk_i32 vcc_hi, 0x4004
-; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:16
-; GFX9-NEXT:    s_movk_i32 vcc_hi, 0x4004
-; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:32
-; GFX9-NEXT:    s_movk_i32 vcc_hi, 0x4004
+; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], s1
+; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:16
+; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:32
 ; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:48
 ; GFX9-NEXT:    s_endpgm
 ;
@@ -2053,12 +2047,12 @@ define amdgpu_kernel void @zero_init_large_offset_kernel() {
 ; GFX10-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX10-NEXT:    v_mov_b32_e32 v2, s2
 ; GFX10-NEXT:    v_mov_b32_e32 v3, s3
-; GFX10-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo
-; GFX10-NEXT:    s_movk_i32 vcc_lo, 0x4004
-; GFX10-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:16
-; GFX10-NEXT:    s_movk_i32 vcc_lo, 0x4004
-; GFX10-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:32
-; GFX10-NEXT:    s_movk_i32 vcc_lo, 0x4004
+; GFX10-NEXT:    s_movk_i32 s2, 0x4004
+; GFX10-NEXT:    s_movk_i32 s1, 0x4004
+; GFX10-NEXT:    s_movk_i32 s0, 0x4004
+; GFX10-NEXT:    scratch_store_dwordx4 off, v[0:3], s2
+; GFX10-NEXT:    scratch_store_dwordx4 off, v[0:3], s1 offset:16
+; GFX10-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:32
 ; GFX10-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:48
 ; GFX10-NEXT:    s_endpgm
 ;
@@ -2073,12 +2067,13 @@ define amdgpu_kernel void @zero_init_large_offset_kernel() {
 ; GFX11-NEXT:    s_mov_b32 s3, s0
 ; GFX11-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
 ; GFX11-NEXT:    v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
-; GFX11-NEXT:    scratch_store_b128 off, v[0:3], vcc_lo
-; GFX11-NEXT:    s_movk_i32 vcc_lo, 0x4004
-; GFX11-NEXT:    scratch_store_b128 off, v[0:3], vcc_lo offset:16
-; GFX11-NEXT:    s_movk_i32 vcc_lo, 0x4004
-; GFX11-NEXT:    scratch_store_b128 off, v[0:3], vcc_lo offset:32
-; GFX11-NEXT:    s_movk_i32 vcc_lo, 0x4004
+; GFX11-NEXT:    s_movk_i32 s2, 0x4004
+; GFX11-NEXT:    s_movk_i32 s1, 0x4004
+; GFX11-NEXT:    s_movk_i32 s0, 0x4004
+; GFX11-NEXT:    s_clause 0x3
+; GFX11-NEXT:    scratch_store_b128 off, v[0:3], s2
+; GFX11-NEXT:    scratch_store_b128 off, v[0:3], s1 offset:16
+; GFX11-NEXT:    scratch_store_b128 off, v[0:3], s0 offset:32
 ; GFX11-NEXT:    scratch_store_b128 off, v[0:3], vcc_lo offset:48
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
@@ -2088,13 +2083,15 @@ define amdgpu_kernel void @zero_init_large_offset_kernel() {
 ; GFX9-PAL-NEXT:    s_getpc_b64 s[2:3]
 ; GFX9-PAL-NEXT:    s_mov_b32 s2, s0
 ; GFX9-PAL-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
-; GFX9-PAL-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-PAL-NEXT:    s_mov_b32 s4, 0
 ; GFX9-PAL-NEXT:    s_mov_b32 s0, 0
+; GFX9-PAL-NEXT:    s_movk_i32 vcc_lo, 0x4004
+; GFX9-PAL-NEXT:    s_movk_i32 vcc_hi, 0x4004
 ; GFX9-PAL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-PAL-NEXT:    s_and_b32 s3, s3, 0xffff
 ; GFX9-PAL-NEXT:    s_add_u32 flat_scratch_lo, s2, s1
 ; GFX9-PAL-NEXT:    s_addc_u32 flat_scratch_hi, s3, 0
-; GFX9-PAL-NEXT:    scratch_load_dword v0, off, vcc_hi offset:4 glc
+; GFX9-PAL-NEXT:    scratch_load_dword v0, off, s4 offset:4 glc
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_mov_b32 s1, s0
 ; GFX9-PAL-NEXT:    s_mov_b32 s2, s0
@@ -2103,13 +2100,11 @@ define amdgpu_kernel void @zero_init_large_offset_kernel() {
 ; GFX9-PAL-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX9-PAL-NEXT:    v_mov_b32_e32 v2, s2
 ; GFX9-PAL-NEXT:    v_mov_b32_e32 v3, s3
-; GFX9-PAL-NEXT:    s_movk_i32 vcc_hi, 0x4004
-; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi
-; GFX9-PAL-NEXT:    s_movk_i32 vcc_hi, 0x4004
-; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:16
-; GFX9-PAL-NEXT:    s_movk_i32 vcc_hi, 0x4004
-; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:32
-; GFX9-PAL-NEXT:    s_movk_i32 vcc_hi, 0x4004
+; GFX9-PAL-NEXT:    s_movk_i32 s1, 0x4004
+; GFX9-PAL-NEXT:    s_movk_i32 s0, 0x4004
+; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s1
+; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:16
+; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:32
 ; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:48
 ; GFX9-PAL-NEXT:    s_endpgm
 ;
@@ -2123,13 +2118,13 @@ define amdgpu_kernel void @zero_init_large_offset_kernel() {
 ; GFX940-NEXT:    s_mov_b32 s3, s0
 ; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
 ; GFX940-NEXT:    v_mov_b64_e32 v[2:3], s[2:3]
+; GFX940-NEXT:    s_movk_i32 s1, 0x4004
+; GFX940-NEXT:    s_movk_i32 s0, 0x4004
+; GFX940-NEXT:    s_movk_i32 vcc_lo, 0x4004
 ; GFX940-NEXT:    s_movk_i32 vcc_hi, 0x4004
-; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi
-; GFX940-NEXT:    s_movk_i32 vcc_hi, 0x4004
-; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:16
-; GFX940-NEXT:    s_movk_i32 vcc_hi, 0x4004
-; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:32
-; GFX940-NEXT:    s_movk_i32 vcc_hi, 0x4004
+; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], s1
+; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:16
+; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:32
 ; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:48
 ; GFX940-NEXT:    s_endpgm
 ;
@@ -2144,9 +2139,9 @@ define amdgpu_kernel void @zero_init_large_offset_kernel() {
 ; GFX1010-PAL-NEXT:    s_addc_u32 s3, s3, 0
 ; GFX1010-PAL-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2
 ; GFX1010-PAL-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3
-; GFX1010-PAL-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX1010-PAL-NEXT:    s_mov_b32 s4, 0
 ; GFX1010-PAL-NEXT:    s_mov_b32 s0, 0
-; GFX1010-PAL-NEXT:    scratch_load_dword v0, off, vcc_lo offset:4 glc dlc
+; GFX1010-PAL-NEXT:    scratch_load_dword v0, off, s4 offset:4 glc dlc
 ; GFX1010-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX1010-PAL-NEXT:    s_mov_b32 s1, s0
 ; GFX1010-PAL-NEXT:    s_mov_b32 s2, s0
@@ -2155,16 +2150,13 @@ define amdgpu_kernel void @zero_init_large_offset_kernel() {
 ; GFX1010-PAL-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX1010-PAL-NEXT:    v_mov_b32_e32 v2, s2
 ; GFX1010-PAL-NEXT:    v_mov_b32_e32 v3, s3
+; GFX1010-PAL-NEXT:    s_movk_i32 s2, 0x4004
+; GFX1010-PAL-NEXT:    s_movk_i32 s1, 0x4004
+; GFX1010-PAL-NEXT:    s_movk_i32 s0, 0x4004
 ; GFX1010-PAL-NEXT:    s_movk_i32 vcc_lo, 0x4004
-; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo
-; GFX1010-PAL-NEXT:    s_waitcnt_depctr 0xffe3
-; GFX1010-PAL-NEXT:    s_movk_i32 vcc_lo, 0x4004
-; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:16
-; GFX1010-PAL-NEXT:    s_waitcnt_depctr 0xffe3
-; GFX1010-PAL-NEXT:    s_movk_i32 vcc_lo, 0x4004
-; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:32
-; GFX1010-PAL-NEXT:    s_waitcnt_depctr 0xffe3
-; GFX1010-PAL-NEXT:    s_movk_i32 vcc_lo, 0x4004
+; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s2
+; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s1 offset:16
+; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:32
 ; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:48
 ; GFX1010-PAL-NEXT:    s_endpgm
 ;
@@ -2190,12 +2182,12 @@ define amdgpu_kernel void @zero_init_large_offset_kernel() {
 ; GFX1030-PAL-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX1030-PAL-NEXT:    v_mov_b32_e32 v2, s2
 ; GFX1030-PAL-NEXT:    v_mov_b32_e32 v3, s3
-; GFX1030-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo
-; GFX1030-PAL-NEXT:    s_movk_i32 vcc_lo, 0x4004
-; GFX1030-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:16
-; GFX1030-PAL-NEXT:    s_movk_i32 vcc_lo, 0x4004
-; GFX1030-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:32
-; GFX1030-PAL-NEXT:    s_movk_i32 vcc_lo, 0x4004
+; GFX1030-PAL-NEXT:    s_movk_i32 s2, 0x4004
+; GFX1030-PAL-NEXT:    s_movk_i32 s1, 0x4004
+; GFX1030-PAL-NEXT:    s_movk_i32 s0, 0x4004
+; GFX1030-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s2
+; GFX1030-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s1 offset:16
+; GFX1030-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:32
 ; GFX1030-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:48
 ; GFX1030-PAL-NEXT:    s_endpgm
 ;
@@ -2210,12 +2202,13 @@ define amdgpu_kernel void @zero_init_large_offset_kernel() {
 ; GFX11-PAL-NEXT:    s_mov_b32 s3, s0
 ; GFX11-PAL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
 ; GFX11-PAL-NEXT:    v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
-; GFX11-PAL-NEXT:    scratch_store_b128 off, v[0:3], vcc_lo
-; GFX11-PAL-NEXT:    s_movk_i32 vcc_lo, 0x4004
-; GFX11-PAL-NEXT:    scratch_store_b128 off, v[0:3], vcc_lo offset:16
-; GFX11-PAL-NEXT:    s_movk_i32 vcc_lo, 0x4004
-; GFX11-PAL-NEXT:    scratch_store_b128 off, v[0:3], vcc_lo offset:32
-; GFX11-PAL-NEXT:    s_movk_i32 vcc_lo, 0x4004
+; GFX11-PAL-NEXT:    s_movk_i32 s2, 0x4004
+; GFX11-PAL-NEXT:    s_movk_i32 s1, 0x4004
+; GFX11-PAL-NEXT:    s_movk_i32 s0, 0x4004
+; GFX11-PAL-NEXT:    s_clause 0x3
+; GFX11-PAL-NEXT:    scratch_store_b128 off, v[0:3], s2
+; GFX11-PAL-NEXT:    scratch_store_b128 off, v[0:3], s1 offset:16
+; GFX11-PAL-NEXT:    scratch_store_b128 off, v[0:3], s0 offset:32
 ; GFX11-PAL-NEXT:    scratch_store_b128 off, v[0:3], vcc_lo offset:48
 ; GFX11-PAL-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-PAL-NEXT:    s_endpgm
@@ -2242,13 +2235,13 @@ define void @zero_init_large_offset_foo() {
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX9-NEXT:    v_mov_b32_e32 v2, s2
 ; GFX9-NEXT:    v_mov_b32_e32 v3, s3
+; GFX9-NEXT:    s_add_i32 s1, s32, 0x4004
+; GFX9-NEXT:    s_add_i32 s0, s32, 0x4004
+; GFX9-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
 ; GFX9-NEXT:    s_add_i32 vcc_hi, s32, 0x4004
-; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi
-; GFX9-NEXT:    s_add_i32 vcc_hi, s32, 0x4004
-; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:16
-; GFX9-NEXT:    s_add_i32 vcc_hi, s32, 0x4004
-; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:32
-; GFX9-NEXT:    s_add_i32 vcc_hi, s32, 0x4004
+; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], s1
+; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:16
+; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:32
 ; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:48
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
@@ -2260,7 +2253,6 @@ define void @zero_init_large_offset_foo() {
 ; GFX10-NEXT:    scratch_load_dword v0, off, s32 offset:4 glc dlc
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    s_mov_b32 s0, 0
-; GFX10-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
 ; GFX10-NEXT:    s_mov_b32 s1, s0
 ; GFX10-NEXT:    s_mov_b32 s2, s0
 ; GFX10-NEXT:    s_mov_b32 s3, s0
@@ -2268,12 +2260,13 @@ define void @zero_init_large_offset_foo() {
 ; GFX10-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX10-NEXT:    v_mov_b32_e32 v2, s2
 ; GFX10-NEXT:    v_mov_b32_e32 v3, s3
-; GFX10-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo
-; GFX10-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
-; GFX10-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:16
-; GFX10-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
-; GFX10-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:32
+; GFX10-NEXT:    s_add_i32 s2, s32, 0x4004
+; GFX10-NEXT:    s_add_i32 s1, s32, 0x4004
+; GFX10-NEXT:    s_add_i32 s0, s32, 0x4004
 ; GFX10-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
+; GFX10-NEXT:    scratch_store_dwordx4 off, v[0:3], s2
+; GFX10-NEXT:    scratch_store_dwordx4 off, v[0:3], s1 offset:16
+; GFX10-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:32
 ; GFX10-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:48
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
@@ -2285,18 +2278,20 @@ define void @zero_init_large_offset_foo() {
 ; GFX11-NEXT:    scratch_load_b32 v0, off, s32 offset:4 glc dlc
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    s_mov_b32 s0, 0
-; GFX11-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
+; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
 ; GFX11-NEXT:    s_mov_b32 s1, s0
 ; GFX11-NEXT:    s_mov_b32 s2, s0
 ; GFX11-NEXT:    s_mov_b32 s3, s0
 ; GFX11-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
 ; GFX11-NEXT:    v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
-; GFX11-NEXT:    scratch_store_b128 off, v[0:3], vcc_lo
-; GFX11-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
-; GFX11-NEXT:    scratch_store_b128 off, v[0:3], vcc_lo offset:16
-; GFX11-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
-; GFX11-NEXT:    scratch_store_b128 off, v[0:3], vcc_lo offset:32
+; GFX11-NEXT:    s_add_i32 s2, s32, 0x4004
+; GFX11-NEXT:    s_add_i32 s1, s32, 0x4004
+; GFX11-NEXT:    s_add_i32 s0, s32, 0x4004
 ; GFX11-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
+; GFX11-NEXT:    s_clause 0x3
+; GFX11-NEXT:    scratch_store_b128 off, v[0:3], s2
+; GFX11-NEXT:    scratch_store_b128 off, v[0:3], s1 offset:16
+; GFX11-NEXT:    scratch_store_b128 off, v[0:3], s0 offset:32
 ; GFX11-NEXT:    scratch_store_b128 off, v[0:3], vcc_lo offset:48
 ; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
@@ -2314,13 +2309,13 @@ define void @zero_init_large_offset_foo() {
 ; GFX9-PAL-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX9-PAL-NEXT:    v_mov_b32_e32 v2, s2
 ; GFX9-PAL-NEXT:    v_mov_b32_e32 v3, s3
+; GFX9-PAL-NEXT:    s_add_i32 s1, s32, 0x4004
+; GFX9-PAL-NEXT:    s_add_i32 s0, s32, 0x4004
+; GFX9-PAL-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
 ; GFX9-PAL-NEXT:    s_add_i32 vcc_hi, s32, 0x4004
-; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi
-; GFX9-PAL-NEXT:    s_add_i32 vcc_hi, s32, 0x4004
-; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:16
-; GFX9-PAL-NEXT:    s_add_i32 vcc_hi, s32, 0x4004
-; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:32
-; GFX9-PAL-NEXT:    s_add_i32 vcc_hi, s32, 0x4004
+; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s1
+; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:16
+; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:32
 ; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:48
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_setpc_b64 s[30:31]
@@ -2336,69 +2331,41 @@ define void @zero_init_large_offset_foo() {
 ; GFX940-NEXT:    s_mov_b32 s3, s0
 ; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
 ; GFX940-NEXT:    v_mov_b64_e32 v[2:3], s[2:3]
+; GFX940-NEXT:    s_add_i32 s1, s32, 0x4004
+; GFX940-NEXT:    s_add_i32 s0, s32, 0x4004
+; GFX940-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
 ; GFX940-NEXT:    s_add_i32 vcc_hi, s32, 0x4004
-; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi
-; GFX940-NEXT:    s_add_i32 vcc_hi, s32, 0x4004
-; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:16
-; GFX940-NEXT:    s_add_i32 vcc_hi, s32, 0x4004
-; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:32
-; GFX940-NEXT:    s_add_i32 vcc_hi, s32, 0x4004
+; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], s1
+; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:16
+; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:32
 ; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:48
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1010-PAL-LABEL: zero_init_large_offset_foo:
-; GFX1010-PAL:       ; %bb.0:
-; GFX1010-PAL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1010-PAL-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX1010-PAL-NEXT:    scratch_load_dword v0, off, s32 offset:4 glc dlc
-; GFX1010-PAL-NEXT:    s_waitcnt vmcnt(0)
-; GFX1010-PAL-NEXT:    s_mov_b32 s0, 0
-; GFX1010-PAL-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
-; GFX1010-PAL-NEXT:    s_mov_b32 s1, s0
-; GFX1010-PAL-NEXT:    s_mov_b32 s2, s0
-; GFX1010-PAL-NEXT:    s_mov_b32 s3, s0
-; GFX1010-PAL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX1010-PAL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX1010-PAL-NEXT:    v_mov_b32_e32 v2, s2
-; GFX1010-PAL-NEXT:    v_mov_b32_e32 v3, s3
-; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo
-; GFX1010-PAL-NEXT:    s_waitcnt_depctr 0xffe3
-; GFX1010-PAL-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
-; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:16
-; GFX1010-PAL-NEXT:    s_waitcnt_depctr 0xffe3
-; GFX1010-PAL-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
-; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:32
-; GFX1010-PAL-NEXT:    s_waitcnt_depctr 0xffe3
-; GFX1010-PAL-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
-; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:48
-; GFX1010-PAL-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX1010-PAL-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX1030-PAL-LABEL: zero_init_large_offset_foo:
-; GFX1030-PAL:       ; %bb.0:
-; GFX1030-PAL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-PAL-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX1030-PAL-NEXT:    scratch_load_dword v0, off, s32 offset:4 glc dlc
-; GFX1030-PAL-NEXT:    s_waitcnt vmcnt(0)
-; GFX1030-PAL-NEXT:    s_mov_b32 s0, 0
-; GFX1030-PAL-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
-; GFX1030-PAL-NEXT:    s_mov_b32 s1, s0
-; GFX1030-PAL-NEXT:    s_mov_b32 s2, s0
-; GFX1030-PAL-NEXT:    s_mov_b32 s3, s0
-; GFX1030-PAL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX1030-PAL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX1030-PAL-NEXT:    v_mov_b32_e32 v2, s2
-; GFX1030-PAL-NEXT:    v_mov_b32_e32 v3, s3
-; GFX1030-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo
-; GFX1030-PAL-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
-; GFX1030-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:16
-; GFX1030-PAL-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
-; GFX1030-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:32
-; GFX1030-PAL-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
-; GFX1030-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:48
-; GFX1030-PAL-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX1030-PAL-NEXT:    s_setpc_b64 s[30:31]
+; GFX10-PAL-LABEL: zero_init_large_offset_foo:
+; GFX10-PAL:       ; %bb.0:
+; GFX10-PAL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-PAL-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-PAL-NEXT:    scratch_load_dword v0, off, s32 offset:4 glc dlc
+; GFX10-PAL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-PAL-NEXT:    s_mov_b32 s0, 0
+; GFX10-PAL-NEXT:    s_mov_b32 s1, s0
+; GFX10-PAL-NEXT:    s_mov_b32 s2, s0
+; GFX10-PAL-NEXT:    s_mov_b32 s3, s0
+; GFX10-PAL-NEXT:    v_mov_b32_e32 v0, s0
+; GFX10-PAL-NEXT:    v_mov_b32_e32 v1, s1
+; GFX10-PAL-NEXT:    v_mov_b32_e32 v2, s2
+; GFX10-PAL-NEXT:    v_mov_b32_e32 v3, s3
+; GFX10-PAL-NEXT:    s_add_i32 s2, s32, 0x4004
+; GFX10-PAL-NEXT:    s_add_i32 s1, s32, 0x4004
+; GFX10-PAL-NEXT:    s_add_i32 s0, s32, 0x4004
+; GFX10-PAL-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
+; GFX10-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s2
+; GFX10-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s1 offset:16
+; GFX10-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:32
+; GFX10-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:48
+; GFX10-PAL-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-PAL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-PAL-LABEL: zero_init_large_offset_foo:
 ; GFX11-PAL:       ; %bb.0:
@@ -2407,18 +2374,20 @@ define void @zero_init_large_offset_foo() {
 ; GFX11-PAL-NEXT:    scratch_load_b32 v0, off, s32 offset:4 glc dlc
 ; GFX11-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-PAL-NEXT:    s_mov_b32 s0, 0
-; GFX11-PAL-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
+; GFX11-PAL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
 ; GFX11-PAL-NEXT:    s_mov_b32 s1, s0
 ; GFX11-PAL-NEXT:    s_mov_b32 s2, s0
 ; GFX11-PAL-NEXT:    s_mov_b32 s3, s0
 ; GFX11-PAL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
 ; GFX11-PAL-NEXT:    v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
-; GFX11-PAL-NEXT:    scratch_store_b128 off, v[0:3], vcc_lo
-; GFX11-PAL-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
-; GFX11-PAL-NEXT:    scratch_store_b128 off, v[0:3], vcc_lo offset:16
-; GFX11-PAL-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
-; GFX11-PAL-NEXT:    scratch_store_b128 off, v[0:3], vcc_lo offset:32
+; GFX11-PAL-NEXT:    s_add_i32 s2, s32, 0x4004
+; GFX11-PAL-NEXT:    s_add_i32 s1, s32, 0x4004
+; GFX11-PAL-NEXT:    s_add_i32 s0, s32, 0x4004
 ; GFX11-PAL-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
+; GFX11-PAL-NEXT:    s_clause 0x3
+; GFX11-PAL-NEXT:    scratch_store_b128 off, v[0:3], s2
+; GFX11-PAL-NEXT:    scratch_store_b128 off, v[0:3], s1 offset:16
+; GFX11-PAL-NEXT:    scratch_store_b128 off, v[0:3], s0 offset:32
 ; GFX11-PAL-NEXT:    scratch_store_b128 off, v[0:3], vcc_lo offset:48
 ; GFX11-PAL-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX11-PAL-NEXT:    s_setpc_b64 s[30:31]
@@ -2986,13 +2955,13 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) {
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-NEXT:    v_and_b32_e32 v1, 15, v0
+; GFX10-NEXT:    s_add_i32 s0, s32, 0x4004
 ; GFX10-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
+; GFX10-NEXT:    v_lshl_add_u32 v0, v0, 2, s0
 ; GFX10-NEXT:    v_mov_b32_e32 v2, 15
-; GFX10-NEXT:    v_lshl_add_u32 v0, v0, 2, vcc_lo
-; GFX10-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
+; GFX10-NEXT:    v_lshl_add_u32 v1, v1, 2, vcc_lo
 ; GFX10-NEXT:    scratch_load_dword v3, off, s32 offset:4 glc dlc
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    v_lshl_add_u32 v1, v1, 2, vcc_lo
 ; GFX10-NEXT:    scratch_store_dword v0, v2, off
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-NEXT:    scratch_load_dword v0, v1, off glc dlc
@@ -3005,13 +2974,14 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) {
 ; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX11-NEXT:    v_dual_mov_b32 v2, 15 :: v_dual_and_b32 v1, 15, v0
 ; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-NEXT:    s_add_i32 s0, s32, 0x4004
 ; GFX11-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 2, v1
 ; GFX11-NEXT:    scratch_load_b32 v3, off, s32 offset:4 glc dlc
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 2, v1
-; GFX11-NEXT:    scratch_store_b32 v0, v2, vcc_lo dlc
+; GFX11-NEXT:    scratch_store_b32 v0, v2, s0 dlc
 ; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX11-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
 ; GFX11-NEXT:    scratch_load_b32 v0, v1, vcc_lo glc dlc
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
@@ -3040,9 +3010,9 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) {
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
 ; GFX940-NEXT:    v_mov_b32_e32 v2, 15
-; GFX940-NEXT:    s_add_i32 vcc_hi, s32, 0x4004
+; GFX940-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
 ; GFX940-NEXT:    v_and_b32_e32 v0, 15, v0
-; GFX940-NEXT:    scratch_store_dword v1, v2, vcc_hi sc0 sc1
+; GFX940-NEXT:    scratch_store_dword v1, v2, vcc_lo sc0 sc1
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX940-NEXT:    s_add_i32 vcc_hi, s32, 0x4004
@@ -3055,13 +3025,13 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) {
 ; GFX10-PAL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-PAL-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-PAL-NEXT:    v_and_b32_e32 v1, 15, v0
+; GFX10-PAL-NEXT:    s_add_i32 s0, s32, 0x4004
 ; GFX10-PAL-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
+; GFX10-PAL-NEXT:    v_lshl_add_u32 v0, v0, 2, s0
 ; GFX10-PAL-NEXT:    v_mov_b32_e32 v2, 15
-; GFX10-PAL-NEXT:    v_lshl_add_u32 v0, v0, 2, vcc_lo
-; GFX10-PAL-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
+; GFX10-PAL-NEXT:    v_lshl_add_u32 v1, v1, 2, vcc_lo
 ; GFX10-PAL-NEXT:    scratch_load_dword v3, off, s32 offset:4 glc dlc
 ; GFX10-PAL-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-PAL-NEXT:    v_lshl_add_u32 v1, v1, 2, vcc_lo
 ; GFX10-PAL-NEXT:    scratch_store_dword v0, v2, off
 ; GFX10-PAL-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-PAL-NEXT:    scratch_load_dword v0, v1, off glc dlc
@@ -3074,13 +3044,14 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) {
 ; GFX11-PAL-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX11-PAL-NEXT:    v_dual_mov_b32 v2, 15 :: v_dual_and_b32 v1, 15, v0
 ; GFX11-PAL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-PAL-NEXT:    s_add_i32 s0, s32, 0x4004
 ; GFX11-PAL-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
+; GFX11-PAL-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-PAL-NEXT:    v_lshlrev_b32_e32 v1, 2, v1
 ; GFX11-PAL-NEXT:    scratch_load_b32 v3, off, s32 offset:4 glc dlc
 ; GFX11-PAL-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-PAL-NEXT:    v_lshlrev_b32_e32 v1, 2, v1
-; GFX11-PAL-NEXT:    scratch_store_b32 v0, v2, vcc_lo dlc
+; GFX11-PAL-NEXT:    scratch_store_b32 v0, v2, s0 dlc
 ; GFX11-PAL-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX11-PAL-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
 ; GFX11-PAL-NEXT:    scratch_load_b32 v0, v1, vcc_lo glc dlc
 ; GFX11-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-PAL-NEXT:    s_setpc_b64 s[30:31]
@@ -4138,8 +4109,8 @@ define amdgpu_ps void @large_offset() {
 ; GFX9-NEXT:    v_mov_b32_e32 v1, v0
 ; GFX9-NEXT:    v_mov_b32_e32 v2, v0
 ; GFX9-NEXT:    v_mov_b32_e32 v3, v0
-; GFX9-NEXT:    s_mov_b32 vcc_hi, 0
-; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:3024
+; GFX9-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:3024
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_mov_b32 vcc_hi, 0
 ; GFX9-NEXT:    scratch_load_dwordx4 v[0:3], off, vcc_hi offset:3024 glc
@@ -4213,8 +4184,8 @@ define amdgpu_ps void @large_offset() {
 ; GFX9-PAL-NEXT:    s_and_b32 s3, s3, 0xffff
 ; GFX9-PAL-NEXT:    s_add_u32 flat_scratch_lo, s2, s0
 ; GFX9-PAL-NEXT:    s_addc_u32 flat_scratch_hi, s3, 0
-; GFX9-PAL-NEXT:    s_mov_b32 vcc_hi, 0
-; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:3024
+; GFX9-PAL-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:3024
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_mov_b32 vcc_hi, 0
 ; GFX9-PAL-NEXT:    scratch_load_dwordx4 v[0:3], off, vcc_hi offset:3024 glc

diff  --git a/llvm/test/CodeGen/AMDGPU/frame-index.mir b/llvm/test/CodeGen/AMDGPU/frame-index.mir
index 82bd45d71da1..bdf7364b74be 100644
--- a/llvm/test/CodeGen/AMDGPU/frame-index.mir
+++ b/llvm/test/CodeGen/AMDGPU/frame-index.mir
@@ -89,8 +89,8 @@ body:             |
     ; GCN-LABEL: name: func_add_constant_to_fi_uniform_SCC_clobber_i32
     ; GCN: liveins: $sgpr30_sgpr31
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: $vcc_hi = S_LSHR_B32 6, $sgpr32, implicit-def dead $scc
-    ; GCN-NEXT: renamable $sgpr4 = nuw S_ADD_U32 killed $vcc_hi, 4, implicit-def $scc
+    ; GCN-NEXT: $vcc_lo = S_LSHR_B32 6, $sgpr32, implicit-def dead $scc
+    ; GCN-NEXT: renamable $sgpr4 = nuw S_ADD_U32 killed $vcc_lo, 4, implicit-def $scc
     ; GCN-NEXT: renamable $sgpr5 = S_ADDC_U32 $sgpr4, 1234567, implicit-def $scc, implicit $scc
     ; GCN-NEXT: $vcc_hi = S_LSHR_B32 $sgpr32, 6, implicit-def $scc
     ; GCN-NEXT: $vcc_hi = S_ADD_I32 killed $vcc_hi, 8, implicit-def $scc
@@ -184,3 +184,30 @@ body:             |
     S_SETPC_B64_return killed renamable $sgpr30_sgpr31
 
 ...
+---
+name: func_frame_idx_at_the_end_of_bb
+tracksRegLiveness: true
+stack:
+  - { id: 0, type: default, offset: 0, size: 8, alignment: 4,
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+      local-offset: 0, debug-info-variable: '', debug-info-expression: '',
+      debug-info-location: '' }
+machineFunctionInfo:
+  stackPtrOffsetReg: '$sgpr32'
+
+body:             |
+  bb.0:
+    liveins: $vgpr31
+
+    ; GCN-LABEL: name: func_frame_idx_at_the_end_of_bb
+    ; GCN: liveins: $vgpr31
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: renamable $vgpr0 = V_AND_B32_e32 1023, killed $vgpr31, implicit $exec
+    ; GCN-NEXT: renamable $vgpr0 = V_LSHLREV_B32_e32 2, killed $vgpr0, implicit $exec
+    ; GCN-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+    ; GCN-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 killed $vgpr1, killed $vgpr0, implicit-def dead $vcc, implicit $exec
+    renamable $vgpr0 = V_AND_B32_e32 1023, killed $vgpr31, implicit $exec
+    renamable $vgpr0 = V_LSHLREV_B32_e32 2, killed $vgpr0, implicit $exec
+    renamable $vgpr0 = V_ADD_CO_U32_e32 %stack.0, killed $vgpr0, implicit-def dead $vcc, implicit $exec
+...
+

diff  --git a/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll b/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll
index 450cae8a4a38..55cd662a0bd8 100644
--- a/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll
+++ b/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll
@@ -42,14 +42,14 @@ define void @callee_with_stack_and_call() #0 {
 ; NO-SPILL-TO-VGPR-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
 ; NO-SPILL-TO-VGPR-NEXT:    s_mov_b32 s33, s32
 ; NO-SPILL-TO-VGPR-NEXT:    s_addk_i32 s32, 0x800
-; NO-SPILL-TO-VGPR-NEXT:    s_mov_b64 s[8:9], exec
+; NO-SPILL-TO-VGPR-NEXT:    s_mov_b64 s[10:11], exec
 ; NO-SPILL-TO-VGPR-NEXT:    s_mov_b64 exec, 1
-; NO-SPILL-TO-VGPR-NEXT:    buffer_store_dword v1, off, s[0:3], s33 offset:16
-; NO-SPILL-TO-VGPR-NEXT:    v_writelane_b32 v1, s30, 0
-; NO-SPILL-TO-VGPR-NEXT:    buffer_store_dword v1, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
-; NO-SPILL-TO-VGPR-NEXT:    buffer_load_dword v1, off, s[0:3], s33 offset:16
+; NO-SPILL-TO-VGPR-NEXT:    buffer_store_dword v2, off, s[0:3], s33 offset:16
+; NO-SPILL-TO-VGPR-NEXT:    v_writelane_b32 v2, s30, 0
+; NO-SPILL-TO-VGPR-NEXT:    buffer_store_dword v2, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
+; NO-SPILL-TO-VGPR-NEXT:    buffer_load_dword v2, off, s[0:3], s33 offset:16
 ; NO-SPILL-TO-VGPR-NEXT:    s_waitcnt vmcnt(0)
-; NO-SPILL-TO-VGPR-NEXT:    s_mov_b64 exec, s[8:9]
+; NO-SPILL-TO-VGPR-NEXT:    s_mov_b64 exec, s[10:11]
 ; NO-SPILL-TO-VGPR-NEXT:    s_mov_b64 s[8:9], exec
 ; NO-SPILL-TO-VGPR-NEXT:    s_mov_b64 exec, 1
 ; NO-SPILL-TO-VGPR-NEXT:    buffer_store_dword v1, off, s[0:3], s33 offset:16
@@ -65,15 +65,15 @@ define void @callee_with_stack_and_call() #0 {
 ; NO-SPILL-TO-VGPR-NEXT:    s_add_u32 s4, s4, external_void_func_void at rel32@lo+4
 ; NO-SPILL-TO-VGPR-NEXT:    s_addc_u32 s5, s5, external_void_func_void at rel32@hi+12
 ; NO-SPILL-TO-VGPR-NEXT:    s_swappc_b64 s[30:31], s[4:5]
-; NO-SPILL-TO-VGPR-NEXT:    s_mov_b64 s[4:5], exec
+; NO-SPILL-TO-VGPR-NEXT:    s_mov_b64 s[6:7], exec
 ; NO-SPILL-TO-VGPR-NEXT:    s_mov_b64 exec, 1
-; NO-SPILL-TO-VGPR-NEXT:    buffer_store_dword v1, off, s[0:3], s33 offset:16
-; NO-SPILL-TO-VGPR-NEXT:    buffer_load_dword v1, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
+; NO-SPILL-TO-VGPR-NEXT:    buffer_store_dword v2, off, s[0:3], s33 offset:16
+; NO-SPILL-TO-VGPR-NEXT:    buffer_load_dword v2, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
 ; NO-SPILL-TO-VGPR-NEXT:    s_waitcnt vmcnt(0)
-; NO-SPILL-TO-VGPR-NEXT:    v_readlane_b32 s31, v1, 0
-; NO-SPILL-TO-VGPR-NEXT:    buffer_load_dword v1, off, s[0:3], s33 offset:16
+; NO-SPILL-TO-VGPR-NEXT:    v_readlane_b32 s31, v2, 0
+; NO-SPILL-TO-VGPR-NEXT:    buffer_load_dword v2, off, s[0:3], s33 offset:16
 ; NO-SPILL-TO-VGPR-NEXT:    s_waitcnt vmcnt(0)
-; NO-SPILL-TO-VGPR-NEXT:    s_mov_b64 exec, s[4:5]
+; NO-SPILL-TO-VGPR-NEXT:    s_mov_b64 exec, s[6:7]
 ; NO-SPILL-TO-VGPR-NEXT:    s_mov_b64 s[4:5], exec
 ; NO-SPILL-TO-VGPR-NEXT:    s_mov_b64 exec, 1
 ; NO-SPILL-TO-VGPR-NEXT:    buffer_store_dword v1, off, s[0:3], s33 offset:16

diff  --git a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
index 776d4ed37e4c..16f513dba3c5 100644
--- a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
+++ b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
@@ -236,29 +236,29 @@ define amdgpu_kernel void @local_stack_offset_uses_sp_flat(<3 x i64> addrspace(1
 ; MUBUF-NEXT:    s_waitcnt vmcnt(0)
 ; MUBUF-NEXT:    v_or_b32_e32 v1, 0x12cc, v0
 ; MUBUF-NEXT:    v_or_b32_e32 v0, 0x12c8, v0
-; MUBUF-NEXT:    v_mov_b32_e32 v13, 0x4000
+; MUBUF-NEXT:    v_mov_b32_e32 v18, 0x4000
+; MUBUF-NEXT:    v_mov_b32_e32 v17, 0x4000
+; MUBUF-NEXT:    v_mov_b32_e32 v16, 0x4000
 ; MUBUF-NEXT:    buffer_load_dword v1, v1, s[0:3], 0 offen glc
 ; MUBUF-NEXT:    s_waitcnt vmcnt(0)
-; MUBUF-NEXT:    v_mov_b32_e32 v12, 0
+; MUBUF-NEXT:    v_mov_b32_e32 v15, 0x4000
 ; MUBUF-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen glc
 ; MUBUF-NEXT:    s_waitcnt vmcnt(0)
-; MUBUF-NEXT:    buffer_load_dword v8, v13, s[0:3], 0 offen glc
+; MUBUF-NEXT:    v_mov_b32_e32 v14, 0x4000
+; MUBUF-NEXT:    buffer_load_dword v8, v18, s[0:3], 0 offen glc
 ; MUBUF-NEXT:    s_waitcnt vmcnt(0)
-; MUBUF-NEXT:    v_mov_b32_e32 v13, 0x4000
-; MUBUF-NEXT:    buffer_load_dword v9, v13, s[0:3], 0 offen offset:4 glc
+; MUBUF-NEXT:    buffer_load_dword v9, v17, s[0:3], 0 offen offset:4 glc
 ; MUBUF-NEXT:    s_waitcnt vmcnt(0)
-; MUBUF-NEXT:    v_mov_b32_e32 v13, 0x4000
-; MUBUF-NEXT:    buffer_load_dword v2, v13, s[0:3], 0 offen offset:8 glc
+; MUBUF-NEXT:    buffer_load_dword v2, v16, s[0:3], 0 offen offset:8 glc
 ; MUBUF-NEXT:    s_waitcnt vmcnt(0)
-; MUBUF-NEXT:    v_mov_b32_e32 v13, 0x4000
-; MUBUF-NEXT:    buffer_load_dword v3, v13, s[0:3], 0 offen offset:12 glc
+; MUBUF-NEXT:    buffer_load_dword v3, v15, s[0:3], 0 offen offset:12 glc
 ; MUBUF-NEXT:    s_waitcnt vmcnt(0)
-; MUBUF-NEXT:    v_mov_b32_e32 v13, 0x4000
-; MUBUF-NEXT:    buffer_load_dword v10, v13, s[0:3], 0 offen offset:16 glc
+; MUBUF-NEXT:    buffer_load_dword v10, v14, s[0:3], 0 offen offset:16 glc
 ; MUBUF-NEXT:    s_waitcnt vmcnt(0)
 ; MUBUF-NEXT:    v_mov_b32_e32 v13, 0x4000
 ; MUBUF-NEXT:    buffer_load_dword v11, v13, s[0:3], 0 offen offset:20 glc
 ; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    v_mov_b32_e32 v12, 0
 ; MUBUF-NEXT:    v_add_co_u32_e32 v2, vcc, v0, v2
 ; MUBUF-NEXT:    v_addc_co_u32_e32 v3, vcc, v1, v3, vcc
 ; MUBUF-NEXT:    v_add_co_u32_e32 v0, vcc, v7, v8
@@ -297,10 +297,10 @@ define amdgpu_kernel void @local_stack_offset_uses_sp_flat(<3 x i64> addrspace(1
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
 ; FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 offset:704 glc
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    s_movk_i32 s3, 0x2000
 ; FLATSCR-NEXT:    s_movk_i32 s2, 0x2000
-; FLATSCR-NEXT:    scratch_load_dwordx2 v[10:11], off, s2 offset:16 glc
+; FLATSCR-NEXT:    scratch_load_dwordx2 v[10:11], off, s3 offset:16 glc
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; FLATSCR-NEXT:    s_movk_i32 s2, 0x2000
 ; FLATSCR-NEXT:    scratch_load_dwordx4 v[4:7], off, s2 glc
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
 ; FLATSCR-NEXT:    v_mov_b32_e32 v12, 0

diff  --git a/llvm/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir b/llvm/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir
index f0d0abd31ed5..aa4428f3da4e 100644
--- a/llvm/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir
+++ b/llvm/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir
@@ -29,8 +29,8 @@ body:             |
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   $sgpr0 = S_ADD_U32 $sgpr0, $sgpr4, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
   ; CHECK-NEXT:   $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
-  ; CHECK-NEXT:   $sgpr5 = S_MOV_B32 524288
-  ; CHECK-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, align 8192, addrspace 5)
+  ; CHECK-NEXT:   $sgpr4 = S_MOV_B32 524288
+  ; CHECK-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr4, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, align 8192, addrspace 5)
   ; CHECK-NEXT:   S_BRANCH %bb.1
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1:

diff  --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir
index e61e224d3252..faf4c16a64ff 100644
--- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir
+++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir
@@ -20,84 +20,109 @@ machineFunctionInfo:
   stackPtrOffsetReg:  $sgpr32
 
 body:             |
+  ; GFX8-LABEL: name: pei_scavenge_vgpr_spill
+  ; GFX8: bb.0:
+  ; GFX8-NEXT:   successors: %bb.1(0x80000000)
+  ; GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr2
+  ; GFX8-NEXT: {{  $}}
+  ; GFX8-NEXT:   $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+  ; GFX8-NEXT:   $sgpr6 = S_ADD_I32 $sgpr32, 1048832, implicit-def dead $scc
+  ; GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
+  ; GFX8-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
+  ; GFX8-NEXT:   $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2
+  ; GFX8-NEXT:   $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
+  ; GFX8-NEXT:   $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
+  ; GFX8-NEXT:   $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
+  ; GFX8-NEXT:   $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+  ; GFX8-NEXT:   $vcc_lo = S_MOV_B32 8192
+  ; GFX8-NEXT:   $vgpr0, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr0, 0, implicit $exec
+  ; GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
+  ; GFX8-NEXT:   $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+  ; GFX8-NEXT:   $vcc_lo = S_MOV_B32 16384
+  ; GFX8-NEXT:   $vgpr2, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr2, 0, implicit $exec
+  ; GFX8-NEXT:   $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec
+  ; GFX8-NEXT:   $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
+  ; GFX8-NEXT:   S_BRANCH %bb.1
+  ; GFX8-NEXT: {{  $}}
+  ; GFX8-NEXT: bb.1:
+  ; GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr2
+  ; GFX8-NEXT: {{  $}}
+  ; GFX8-NEXT:   $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
+  ; GFX8-NEXT:   $sgpr33 = V_READLANE_B32 $vgpr2, 0
+  ; GFX8-NEXT:   $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+  ; GFX8-NEXT:   $sgpr6 = S_ADD_I32 $sgpr32, 1048832, implicit-def dead $scc
+  ; GFX8-NEXT:   $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
+  ; GFX8-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
+  ; GFX8-NEXT:   S_ENDPGM 0, amdgpu_allvgprs
+  ; GFX9-LABEL: name: pei_scavenge_vgpr_spill
+  ; GFX9: bb.0:
+  ; GFX9-NEXT:   successors: %bb.1(0x80000000)
+  ; GFX9-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr2
+  ; GFX9-NEXT: {{  $}}
+  ; GFX9-NEXT:   $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+  ; GFX9-NEXT:   $sgpr6 = S_ADD_I32 $sgpr32, 1048832, implicit-def dead $scc
+  ; GFX9-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
+  ; GFX9-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
+  ; GFX9-NEXT:   $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2
+  ; GFX9-NEXT:   $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
+  ; GFX9-NEXT:   $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
+  ; GFX9-NEXT:   $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
+  ; GFX9-NEXT:   $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+  ; GFX9-NEXT:   $vgpr0 = V_ADD_U32_e32 8192, killed $vgpr0, implicit $exec
+  ; GFX9-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
+  ; GFX9-NEXT:   $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+  ; GFX9-NEXT:   $vgpr2 = V_ADD_U32_e32 16384, killed $vgpr2, implicit $exec
+  ; GFX9-NEXT:   $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec
+  ; GFX9-NEXT:   $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
+  ; GFX9-NEXT:   S_BRANCH %bb.1
+  ; GFX9-NEXT: {{  $}}
+  ; GFX9-NEXT: bb.1:
+  ; GFX9-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr2
+  ; GFX9-NEXT: {{  $}}
+  ; GFX9-NEXT:   $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
+  ; GFX9-NEXT:   $sgpr33 = V_READLANE_B32 $vgpr2, 0
+  ; GFX9-NEXT:   $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+  ; GFX9-NEXT:   $sgpr6 = S_ADD_I32 $sgpr32, 1048832, implicit-def dead $scc
+  ; GFX9-NEXT:   $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
+  ; GFX9-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
+  ; GFX9-NEXT:   S_ENDPGM 0, amdgpu_allvgprs
+  ; GFX9-FLATSCR-LABEL: name: pei_scavenge_vgpr_spill
+  ; GFX9-FLATSCR: bb.0:
+  ; GFX9-FLATSCR-NEXT:   successors: %bb.1(0x80000000)
+  ; GFX9-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr2
+  ; GFX9-FLATSCR-NEXT: {{  $}}
+  ; GFX9-FLATSCR-NEXT:   $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+  ; GFX9-FLATSCR-NEXT:   $sgpr6 = S_ADD_I32 $sgpr32, 16388, implicit-def dead $scc
+  ; GFX9-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr2, killed $sgpr6, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.3, addrspace 5)
+  ; GFX9-FLATSCR-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
+  ; GFX9-FLATSCR-NEXT:   $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2
+  ; GFX9-FLATSCR-NEXT:   $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc
+  ; GFX9-FLATSCR-NEXT:   $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc
+  ; GFX9-FLATSCR-NEXT:   $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc
+  ; GFX9-FLATSCR-NEXT:   $vcc_lo = S_ADD_I32 $sgpr33, 8192, implicit-def $scc
+  ; GFX9-FLATSCR-NEXT:   $vgpr0 = V_MOV_B32_e32 killed $vcc_lo, implicit $exec
+  ; GFX9-FLATSCR-NEXT:   $vcc_hi = S_ADD_I32 $sgpr33, 16384, implicit-def $scc
+  ; GFX9-FLATSCR-NEXT:   $vgpr0 = V_OR_B32_e32 killed $vcc_hi, $vgpr1, implicit $exec
+  ; GFX9-FLATSCR-NEXT:   S_BRANCH %bb.1
+  ; GFX9-FLATSCR-NEXT: {{  $}}
+  ; GFX9-FLATSCR-NEXT: bb.1:
+  ; GFX9-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr2
+  ; GFX9-FLATSCR-NEXT: {{  $}}
+  ; GFX9-FLATSCR-NEXT:   $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -32768, implicit-def dead $scc
+  ; GFX9-FLATSCR-NEXT:   $sgpr33 = V_READLANE_B32 $vgpr2, 0
+  ; GFX9-FLATSCR-NEXT:   $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+  ; GFX9-FLATSCR-NEXT:   $sgpr6 = S_ADD_I32 $sgpr32, 16388, implicit-def dead $scc
+  ; GFX9-FLATSCR-NEXT:   $vgpr2 = SCRATCH_LOAD_DWORD_SADDR killed $sgpr6, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.3, addrspace 5)
+  ; GFX9-FLATSCR-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
+  ; GFX9-FLATSCR-NEXT:   S_ENDPGM 0, amdgpu_allvgprs
   bb.0:
     liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
 
-    ; GFX8-LABEL: name: pei_scavenge_vgpr_spill
-    ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr2
-    ; GFX8-NEXT: {{  $}}
-    ; GFX8-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GFX8-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 1048832, implicit-def dead $scc
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
-    ; GFX8-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
-    ; GFX8-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2
-    ; GFX8-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
-    ; GFX8-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
-    ; GFX8-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
-    ; GFX8-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
-    ; GFX8-NEXT: $vcc_lo = S_MOV_B32 8192
-    ; GFX8-NEXT: $vgpr0, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr0, 0, implicit $exec
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
-    ; GFX8-NEXT: $vgpr3 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
-    ; GFX8-NEXT: $vcc_lo = S_MOV_B32 16384
-    ; GFX8-NEXT: $vgpr3, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr3, 0, implicit $exec
-    ; GFX8-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec
-    ; GFX8-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
-    ; GFX8-NEXT: $sgpr33 = V_READLANE_B32 $vgpr2, 0
-    ; GFX8-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GFX8-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 1048832, implicit-def dead $scc
-    ; GFX8-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
-    ; GFX8-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
-    ; GFX8-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
-    ; GFX8-NEXT: S_ENDPGM 0, amdgpu_allvgprs
-    ; GFX9-LABEL: name: pei_scavenge_vgpr_spill
-    ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr2
-    ; GFX9-NEXT: {{  $}}
-    ; GFX9-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GFX9-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 1048832, implicit-def dead $scc
-    ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
-    ; GFX9-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
-    ; GFX9-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2
-    ; GFX9-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
-    ; GFX9-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
-    ; GFX9-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
-    ; GFX9-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
-    ; GFX9-NEXT: $vgpr0 = V_ADD_U32_e32 8192, killed $vgpr0, implicit $exec
-    ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
-    ; GFX9-NEXT: $vgpr3 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
-    ; GFX9-NEXT: $vgpr3 = V_ADD_U32_e32 16384, killed $vgpr3, implicit $exec
-    ; GFX9-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec
-    ; GFX9-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
-    ; GFX9-NEXT: $sgpr33 = V_READLANE_B32 $vgpr2, 0
-    ; GFX9-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GFX9-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 1048832, implicit-def dead $scc
-    ; GFX9-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
-    ; GFX9-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
-    ; GFX9-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
-    ; GFX9-NEXT: S_ENDPGM 0, amdgpu_allvgprs
-    ; GFX9-FLATSCR-LABEL: name: pei_scavenge_vgpr_spill
-    ; GFX9-FLATSCR: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr2
-    ; GFX9-FLATSCR-NEXT: {{  $}}
-    ; GFX9-FLATSCR-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GFX9-FLATSCR-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 16388, implicit-def dead $scc
-    ; GFX9-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, killed $sgpr6, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.3, addrspace 5)
-    ; GFX9-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
-    ; GFX9-FLATSCR-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2
-    ; GFX9-FLATSCR-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc
-    ; GFX9-FLATSCR-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc
-    ; GFX9-FLATSCR-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc
-    ; GFX9-FLATSCR-NEXT: $vcc_hi = S_ADD_I32 $sgpr33, 8192, implicit-def $scc
-    ; GFX9-FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 killed $vcc_hi, implicit $exec
-    ; GFX9-FLATSCR-NEXT: $vcc_hi = S_ADD_I32 $sgpr33, 16384, implicit-def $scc
-    ; GFX9-FLATSCR-NEXT: $vgpr0 = V_OR_B32_e32 killed $vcc_hi, $vgpr1, implicit $exec
-    ; GFX9-FLATSCR-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -32768, implicit-def dead $scc
-    ; GFX9-FLATSCR-NEXT: $sgpr33 = V_READLANE_B32 $vgpr2, 0
-    ; GFX9-FLATSCR-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GFX9-FLATSCR-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 16388, implicit-def dead $scc
-    ; GFX9-FLATSCR-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR killed $sgpr6, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.3, addrspace 5)
-    ; GFX9-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
-    ; GFX9-FLATSCR-NEXT: S_ENDPGM 0, amdgpu_allvgprs
     $vgpr0 = V_MOV_B32_e32 %stack.0, implicit $exec
     $vgpr0 = V_OR_B32_e32 %stack.1, $vgpr1, implicit $exec
+    S_BRANCH %bb.1
+
+  bb.1:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
     S_ENDPGM 0, amdgpu_allvgprs
 ...

diff  --git a/llvm/test/CodeGen/AMDGPU/scratch-simple.ll b/llvm/test/CodeGen/AMDGPU/scratch-simple.ll
index 9d8397f640ac..b87790023627 100644
--- a/llvm/test/CodeGen/AMDGPU/scratch-simple.ll
+++ b/llvm/test/CodeGen/AMDGPU/scratch-simple.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=verde -amdgpu-use-divergent-register-indexing -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,SI,SIVI,MUBUF %s
 ; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx803 -mattr=-flat-for-global -amdgpu-use-divergent-register-indexing -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,VI,SIVI,MUBUF %s
 ; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-flat-for-global -amdgpu-use-divergent-register-indexing -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX9_10,MUBUF,GFX9-MUBUF,GFX9_10-MUBUF %s
@@ -12,303 +13,5939 @@
 ; RELS: R_AMDGPU_ABS32_LO SCRATCH_RSRC_DWORD0
 ; RELS: R_AMDGPU_ABS32_LO SCRATCH_RSRC_DWORD1
 
-; This used to fail due to a v_add_i32 instruction with an illegal immediate
-; operand that was created during Local Stack Slot Allocation. Test case derived
-; from https://bugs.freedesktop.org/show_bug.cgi?id=96602
-;
-; GCN-LABEL: {{^}}ps_main:
-
-; GFX9-FLATSCR-DAG: s_add_u32 flat_scratch_lo, s0, s2
-; GFX9-FLATSCR-DAG: s_addc_u32 flat_scratch_hi, s1, 0
-; GFX9-FLATSCR-DAG: v_and_b32_e32 [[CLAMP_IDX:v[0-9]+]], 0x1fc, v0
-
-; GFX10-FLATSCR: s_add_u32 s0, s0, s2
-; GFX10-FLATSCR: s_addc_u32 s1, s1, 0
-; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
-; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
-
-; GFX9-FLATSCR-PAL-DAG: s_getpc_b64 s[2:3]
-; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s2, s0
-; GFX9-FLATSCR-PAL-DAG: s_load_dwordx2 s[2:3], s[2:3], 0x0
-; GFX9-FLATSCR-PAL-DAG: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-FLATSCR-PAL-DAG: v_mov_b32_e32 v0, 0xbf20e7f4
-; GFX9-FLATSCR-PAL-DAG: s_mov_b32 vcc_hi, 0
-; GFX9-FLATSCR-PAL-DAG: s_waitcnt lgkmcnt(0)
-; GFX9-FLATSCR-PAL-DAG: s_and_b32 s3, s3, 0xffff
-; GFX9-FLATSCR-PAL-DAG: s_add_u32 flat_scratch_lo, s2, s0
-; GFX9-FLATSCR-PAL-DAG: s_addc_u32 flat_scratch_hi, s3, 0
-; GFX9-FLATSCR-PAL-DAG: v_and_b32_e32 [[CLAMP_IDX:v[0-9]+]], 0x1fc, v0
-
-; GFX10-FLATSCR-PAL: s_getpc_b64 s[2:3]
-; GFX10-FLATSCR-PAL: s_mov_b32 s2, s0
-; GFX10-FLATSCR-PAL: s_load_dwordx2 s[2:3], s[2:3], 0x0
-; GFX10-FLATSCR-PAL: s_waitcnt lgkmcnt(0)
-; GFX10-FLATSCR-PAL: s_and_b32 s3, s3, 0xffff
-; GFX10-FLATSCR-PAL: s_add_u32 s2, s2, s0
-; GFX10-FLATSCR-PAL: s_addc_u32 s3, s3, 0
-; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2
-; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3
-
-; MUBUF-DAG: s_mov_b32 s0, SCRATCH_RSRC_DWORD0
-; MUBUF-DAG: s_mov_b32 s1, SCRATCH_RSRC_DWORD1
-; MUBUF-DAG: s_mov_b32 s2, -1
-; SI-DAG: s_mov_b32 s3, 0xe8f000
-; VI-DAG: s_mov_b32 s3, 0xe80000
-; GFX9-MUBUF-DAG: s_mov_b32 s3, 0xe00000
-; GFX10_W32-MUBUF-DAG: s_mov_b32 s3, 0x31c16000
-; GFX10_W64-MUBUF-DAG: s_mov_b32 s3, 0x31e16000
-
-; FLATSCR-NOT: SCRATCH_RSRC_DWORD
 
-; GFX9-FLATSCR: s_mov_b32 [[SP:[^,]+]], 0
-; GFX9-FLATSCR: scratch_store_dwordx4 off, v[{{[0-9:]+}}], [[SP]] offset:
-
-; GFX10-FLATSCR: scratch_store_dwordx4 off, v[{{[0-9:]+}}], off offset:
-
-; MUBUF-DAG:     v_lshlrev_b32_e32 [[BYTES:v[0-9]+]], 2, v0
-; MUBUF-DAG:     v_and_b32_e32 [[CLAMP_IDX:v[0-9]+]], 0x1fc, [[BYTES]]
-; GFX10-FLATSCR: v_and_b32_e32 [[CLAMP_IDX:v[0-9]+]], 0x1fc, v0
-; GFX10-FLATSCR-PAL: v_and_b32_e32 [[CLAMP_IDX:v[0-9]+]], 0x1fc, v0
-; GCN-NOT: s_mov_b32 s0
-
-; GCN-DAG: v_add{{_|_nc_}}{{i|u}}32_e32 [[HI_OFF:v[0-9]+]],{{.*}} 0x280, [[CLAMP_IDX]]
-; GCN-DAG: v_add{{_|_nc_}}{{i|u}}32_e32 [[LO_OFF:v[0-9]+]],{{.*}} {{v2|0x80}}, [[CLAMP_IDX]]
-
-; MUBUF: buffer_load_dword {{v[0-9]+}}, [[LO_OFF]], {{s\[[0-9]+:[0-9]+\]}}, 0 offen
-; MUBUF: buffer_load_dword {{v[0-9]+}}, [[HI_OFF]], {{s\[[0-9]+:[0-9]+\]}}, 0 offen
-; FLATSCR: scratch_load_dword {{v[0-9]+}}, [[LO_OFF]], off
 define amdgpu_ps float @ps_main(i32 %idx) {
+; SI-LABEL: ps_main:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_mov_b32 s4, s0
+; SI-NEXT:    s_mov_b32 s0, SCRATCH_RSRC_DWORD0
+; SI-NEXT:    s_mov_b32 s1, SCRATCH_RSRC_DWORD1
+; SI-NEXT:    s_mov_b32 s2, -1
+; SI-NEXT:    s_mov_b32 s3, 0xe8f000
+; SI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; SI-NEXT:    s_add_u32 s0, s0, s4
+; SI-NEXT:    v_and_b32_e32 v0, 0x1fc, v0
+; SI-NEXT:    v_mov_b32_e32 v2, 0x80
+; SI-NEXT:    s_addc_u32 s1, s1, 0
+; SI-NEXT:    v_add_i32_e32 v1, vcc, 0x280, v0
+; SI-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
+; SI-NEXT:    v_mov_b32_e32 v2, 0xbf20e7f4
+; SI-NEXT:    v_mov_b32_e32 v3, 0x3f3d349e
+; SI-NEXT:    v_mov_b32_e32 v4, 0x3f523be1
+; SI-NEXT:    v_mov_b32_e32 v5, 0x3f5f2ee2
+; SI-NEXT:    v_mov_b32_e32 v6, 0x3f638e37
+; SI-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:448
+; SI-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:444
+; SI-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:440
+; SI-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:436
+; SI-NEXT:    buffer_store_dword v6, off, s[0:3], 0 offset:432
+; SI-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:428
+; SI-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:424
+; SI-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:420
+; SI-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:416
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v2, 0xbefcd8a3
+; SI-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:412
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v2, 0xbefcd89f
+; SI-NEXT:    v_mov_b32_e32 v7, 0xbeae29dc
+; SI-NEXT:    v_mov_b32_e32 v9, 0xbe31934f
+; SI-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:408
+; SI-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:404
+; SI-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:400
+; SI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:392
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v9, 0xb7043519
+; SI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:388
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v9, 0xb702e758
+; SI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:384
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v9, 0x3e31934f
+; SI-NEXT:    v_mov_b32_e32 v10, 0x3eae29d8
+; SI-NEXT:    v_mov_b32_e32 v11, 0x3efcd89c
+; SI-NEXT:    v_mov_b32_e32 v8, 0xbe319356
+; SI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:380
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v9, 0x3e319356
+; SI-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:372
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v10, 0x3eae29dc
+; SI-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:364
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v11, 0x3efcd89f
+; SI-NEXT:    v_mov_b32_e32 v12, 0xbf20e7f5
+; SI-NEXT:    v_mov_b32_e32 v13, 0xbf3d349e
+; SI-NEXT:    v_mov_b32_e32 v14, 0xbf523be3
+; SI-NEXT:    v_mov_b32_e32 v15, 0xbf5f2ee3
+; SI-NEXT:    v_mov_b32_e32 v16, 0xbf638e39
+; SI-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:396
+; SI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:376
+; SI-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:368
+; SI-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:360
+; SI-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:356
+; SI-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:352
+; SI-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:348
+; SI-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:344
+; SI-NEXT:    buffer_store_dword v16, off, s[0:3], 0 offset:340
+; SI-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:336
+; SI-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:332
+; SI-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:328
+; SI-NEXT:    s_waitcnt expcnt(1)
+; SI-NEXT:    v_mov_b32_e32 v14, 0x3f20e7f5
+; SI-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:324
+; SI-NEXT:    v_mov_b32_e32 v17, 0x3f20e7f4
+; SI-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen
+; SI-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:960
+; SI-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:956
+; SI-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:952
+; SI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:948
+; SI-NEXT:    s_waitcnt expcnt(3)
+; SI-NEXT:    v_mov_b32_e32 v17, 0x3703c499
+; SI-NEXT:    v_mov_b32_e32 v18, 0x3f3d349c
+; SI-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:944
+; SI-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:940
+; SI-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:936
+; SI-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:932
+; SI-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:928
+; SI-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:924
+; SI-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:920
+; SI-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:916
+; SI-NEXT:    s_waitcnt expcnt(2)
+; SI-NEXT:    v_mov_b32_e32 v18, 0xbf523be1
+; SI-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:912
+; SI-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:908
+; SI-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:904
+; SI-NEXT:    buffer_store_dword v6, off, s[0:3], 0 offset:900
+; SI-NEXT:    buffer_store_dword v16, off, s[0:3], 0 offset:896
+; SI-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:892
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v5, 0xbf5f2ee2
+; SI-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:888
+; SI-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:884
+; SI-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:880
+; SI-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:876
+; SI-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:872
+; SI-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:868
+; SI-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:864
+; SI-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:860
+; SI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:856
+; SI-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:852
+; SI-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:848
+; SI-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:844
+; SI-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:840
+; SI-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:836
+; SI-NEXT:    buffer_load_dword v1, v1, s[0:3], 0 offen
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    v_add_f32_e32 v0, v0, v1
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    ; return to shader part epilog
+;
+; VI-LABEL: ps_main:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_mov_b32 s4, s0
+; VI-NEXT:    s_mov_b32 s0, SCRATCH_RSRC_DWORD0
+; VI-NEXT:    s_mov_b32 s1, SCRATCH_RSRC_DWORD1
+; VI-NEXT:    s_mov_b32 s2, -1
+; VI-NEXT:    s_mov_b32 s3, 0xe80000
+; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; VI-NEXT:    s_add_u32 s0, s0, s4
+; VI-NEXT:    v_and_b32_e32 v0, 0x1fc, v0
+; VI-NEXT:    v_mov_b32_e32 v2, 0x80
+; VI-NEXT:    s_addc_u32 s1, s1, 0
+; VI-NEXT:    v_add_u32_e32 v1, vcc, 0x280, v0
+; VI-NEXT:    v_add_u32_e32 v0, vcc, v2, v0
+; VI-NEXT:    v_mov_b32_e32 v2, 0xbf20e7f4
+; VI-NEXT:    v_mov_b32_e32 v3, 0x3f3d349e
+; VI-NEXT:    v_mov_b32_e32 v4, 0x3f523be1
+; VI-NEXT:    v_mov_b32_e32 v5, 0x3f5f2ee2
+; VI-NEXT:    v_mov_b32_e32 v6, 0x3f638e37
+; VI-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:448
+; VI-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:444
+; VI-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:440
+; VI-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:436
+; VI-NEXT:    buffer_store_dword v6, off, s[0:3], 0 offset:432
+; VI-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:428
+; VI-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:424
+; VI-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:420
+; VI-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:416
+; VI-NEXT:    v_mov_b32_e32 v2, 0xbefcd8a3
+; VI-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:412
+; VI-NEXT:    v_mov_b32_e32 v2, 0xbefcd89f
+; VI-NEXT:    v_mov_b32_e32 v7, 0xbeae29dc
+; VI-NEXT:    v_mov_b32_e32 v9, 0xbe31934f
+; VI-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:408
+; VI-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:404
+; VI-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:400
+; VI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:392
+; VI-NEXT:    v_mov_b32_e32 v9, 0xb7043519
+; VI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:388
+; VI-NEXT:    v_mov_b32_e32 v9, 0xb702e758
+; VI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:384
+; VI-NEXT:    v_mov_b32_e32 v9, 0x3e31934f
+; VI-NEXT:    v_mov_b32_e32 v10, 0x3eae29d8
+; VI-NEXT:    v_mov_b32_e32 v11, 0x3efcd89c
+; VI-NEXT:    v_mov_b32_e32 v8, 0xbe319356
+; VI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:380
+; VI-NEXT:    v_mov_b32_e32 v9, 0x3e319356
+; VI-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:372
+; VI-NEXT:    v_mov_b32_e32 v10, 0x3eae29dc
+; VI-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:364
+; VI-NEXT:    v_mov_b32_e32 v11, 0x3efcd89f
+; VI-NEXT:    v_mov_b32_e32 v12, 0xbf20e7f5
+; VI-NEXT:    v_mov_b32_e32 v13, 0xbf3d349e
+; VI-NEXT:    v_mov_b32_e32 v14, 0xbf523be3
+; VI-NEXT:    v_mov_b32_e32 v15, 0xbf5f2ee3
+; VI-NEXT:    v_mov_b32_e32 v16, 0xbf638e39
+; VI-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:396
+; VI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:376
+; VI-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:368
+; VI-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:360
+; VI-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:356
+; VI-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:352
+; VI-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:348
+; VI-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:344
+; VI-NEXT:    buffer_store_dword v16, off, s[0:3], 0 offset:340
+; VI-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:336
+; VI-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:332
+; VI-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:328
+; VI-NEXT:    v_mov_b32_e32 v14, 0x3f20e7f5
+; VI-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:324
+; VI-NEXT:    v_mov_b32_e32 v17, 0x3f20e7f4
+; VI-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen
+; VI-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:960
+; VI-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:956
+; VI-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:952
+; VI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:948
+; VI-NEXT:    v_mov_b32_e32 v17, 0x3703c499
+; VI-NEXT:    v_mov_b32_e32 v18, 0x3f3d349c
+; VI-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:944
+; VI-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:940
+; VI-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:936
+; VI-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:932
+; VI-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:928
+; VI-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:924
+; VI-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:920
+; VI-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:916
+; VI-NEXT:    v_mov_b32_e32 v18, 0xbf523be1
+; VI-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:912
+; VI-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:908
+; VI-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:904
+; VI-NEXT:    buffer_store_dword v6, off, s[0:3], 0 offset:900
+; VI-NEXT:    buffer_store_dword v16, off, s[0:3], 0 offset:896
+; VI-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:892
+; VI-NEXT:    v_mov_b32_e32 v5, 0xbf5f2ee2
+; VI-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:888
+; VI-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:884
+; VI-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:880
+; VI-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:876
+; VI-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:872
+; VI-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:868
+; VI-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:864
+; VI-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:860
+; VI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:856
+; VI-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:852
+; VI-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:848
+; VI-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:844
+; VI-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:840
+; VI-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:836
+; VI-NEXT:    buffer_load_dword v1, v1, s[0:3], 0 offen
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    v_add_f32_e32 v0, v0, v1
+; VI-NEXT:    ; return to shader part epilog
+;
+; GFX9-MUBUF-LABEL: ps_main:
+; GFX9-MUBUF:       ; %bb.0:
+; GFX9-MUBUF-NEXT:    s_mov_b32 s4, s0
+; GFX9-MUBUF-NEXT:    s_mov_b32 s0, SCRATCH_RSRC_DWORD0
+; GFX9-MUBUF-NEXT:    s_mov_b32 s1, SCRATCH_RSRC_DWORD1
+; GFX9-MUBUF-NEXT:    s_mov_b32 s2, -1
+; GFX9-MUBUF-NEXT:    s_mov_b32 s3, 0xe00000
+; GFX9-MUBUF-NEXT:    s_add_u32 s0, s0, s4
+; GFX9-MUBUF-NEXT:    s_addc_u32 s1, s1, 0
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v2, 0xbf20e7f4
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v3, 0x3f3d349e
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v4, 0x3f523be1
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v5, 0x3f5f2ee2
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v6, 0x3f638e37
+; GFX9-MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:448
+; GFX9-MUBUF-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:444
+; GFX9-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:440
+; GFX9-MUBUF-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:436
+; GFX9-MUBUF-NEXT:    buffer_store_dword v6, off, s[0:3], 0 offset:432
+; GFX9-MUBUF-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:428
+; GFX9-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:424
+; GFX9-MUBUF-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:420
+; GFX9-MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:416
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v2, 0xbefcd8a3
+; GFX9-MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:412
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v2, 0xbefcd89f
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v7, 0xbeae29dc
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v9, 0xbe31934f
+; GFX9-MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:408
+; GFX9-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:404
+; GFX9-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:400
+; GFX9-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:392
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v9, 0xb7043519
+; GFX9-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:388
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v9, 0xb702e758
+; GFX9-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:384
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v9, 0x3e31934f
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v10, 0x3eae29d8
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v11, 0x3efcd89c
+; GFX9-MUBUF-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v8, 0xbe319356
+; GFX9-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:380
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v9, 0x3e319356
+; GFX9-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:372
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v10, 0x3eae29dc
+; GFX9-MUBUF-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:364
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v11, 0x3efcd89f
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v12, 0xbf20e7f5
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v13, 0xbf3d349e
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v14, 0xbf523be3
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v15, 0xbf5f2ee3
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v16, 0xbf638e39
+; GFX9-MUBUF-NEXT:    v_and_b32_e32 v0, 0x1fc, v0
+; GFX9-MUBUF-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:396
+; GFX9-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:376
+; GFX9-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:368
+; GFX9-MUBUF-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:360
+; GFX9-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:356
+; GFX9-MUBUF-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:352
+; GFX9-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:348
+; GFX9-MUBUF-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:344
+; GFX9-MUBUF-NEXT:    buffer_store_dword v16, off, s[0:3], 0 offset:340
+; GFX9-MUBUF-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:336
+; GFX9-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:332
+; GFX9-MUBUF-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:328
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v14, 0x3f20e7f5
+; GFX9-MUBUF-NEXT:    v_add_u32_e32 v1, 0x280, v0
+; GFX9-MUBUF-NEXT:    v_add_u32_e32 v0, 0x80, v0
+; GFX9-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:324
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v17, 0x3f20e7f4
+; GFX9-MUBUF-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen
+; GFX9-MUBUF-NEXT:    s_nop 0
+; GFX9-MUBUF-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:960
+; GFX9-MUBUF-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:956
+; GFX9-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:952
+; GFX9-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:948
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v17, 0x3703c499
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v18, 0x3f3d349c
+; GFX9-MUBUF-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:944
+; GFX9-MUBUF-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:940
+; GFX9-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:936
+; GFX9-MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:932
+; GFX9-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:928
+; GFX9-MUBUF-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:924
+; GFX9-MUBUF-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:920
+; GFX9-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:916
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v18, 0xbf523be1
+; GFX9-MUBUF-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:912
+; GFX9-MUBUF-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:908
+; GFX9-MUBUF-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:904
+; GFX9-MUBUF-NEXT:    buffer_store_dword v6, off, s[0:3], 0 offset:900
+; GFX9-MUBUF-NEXT:    buffer_store_dword v16, off, s[0:3], 0 offset:896
+; GFX9-MUBUF-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:892
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v5, 0xbf5f2ee2
+; GFX9-MUBUF-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:888
+; GFX9-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:884
+; GFX9-MUBUF-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:880
+; GFX9-MUBUF-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:876
+; GFX9-MUBUF-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:872
+; GFX9-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:868
+; GFX9-MUBUF-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:864
+; GFX9-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:860
+; GFX9-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:856
+; GFX9-MUBUF-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:852
+; GFX9-MUBUF-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:848
+; GFX9-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:844
+; GFX9-MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:840
+; GFX9-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:836
+; GFX9-MUBUF-NEXT:    buffer_load_dword v1, v1, s[0:3], 0 offen
+; GFX9-MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-MUBUF-NEXT:    v_add_f32_e32 v0, v0, v1
+; GFX9-MUBUF-NEXT:    ; return to shader part epilog
+;
+; GFX10_W32-MUBUF-LABEL: ps_main:
+; GFX10_W32-MUBUF:       ; %bb.0:
+; GFX10_W32-MUBUF-NEXT:    s_mov_b32 s4, s0
+; GFX10_W32-MUBUF-NEXT:    s_mov_b32 s0, SCRATCH_RSRC_DWORD0
+; GFX10_W32-MUBUF-NEXT:    s_mov_b32 s1, SCRATCH_RSRC_DWORD1
+; GFX10_W32-MUBUF-NEXT:    s_mov_b32 s2, -1
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v2, 0x3f3d349e
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v3, 0x3f523be1
+; GFX10_W32-MUBUF-NEXT:    s_mov_b32 s3, 0x31c16000
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v4, 0x3f5f2ee2
+; GFX10_W32-MUBUF-NEXT:    s_add_u32 s0, s0, s4
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v5, 0x3f638e37
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v7, 0xbefcd8a3
+; GFX10_W32-MUBUF-NEXT:    s_addc_u32 s1, s1, 0
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v8, 0xbefcd89f
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v9, 0xbeae29dc
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], 0 offset:448
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:444
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:440
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:436
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:432
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:428
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:424
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:420
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], 0 offset:416
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:412
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:408
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:404
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:400
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v1, 0xbe319356
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v7, 0xbe31934f
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v10, 0xb7043519
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v11, 0xb702e758
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v12, 0x3e31934f
+; GFX10_W32-MUBUF-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], 0 offset:396
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:392
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:388
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:384
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:380
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v7, 0x3e319356
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v10, 0x3eae29d8
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v11, 0x3eae29dc
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v12, 0x3efcd89c
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v13, 0x3efcd89f
+; GFX10_W32-MUBUF-NEXT:    v_and_b32_e32 v0, 0x1fc, v0
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:376
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:372
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:368
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:364
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:360
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v10, 0xbf20e7f5
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v15, 0xbf5f2ee3
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v12, 0xbf3d349e
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v14, 0xbf523be3
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v16, 0xbf638e39
+; GFX10_W32-MUBUF-NEXT:    v_add_nc_u32_e32 v6, 0x280, v0
+; GFX10_W32-MUBUF-NEXT:    v_add_nc_u32_e32 v0, 0x80, v0
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:356
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:352
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:348
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:344
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v16, off, s[0:3], 0 offset:340
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v17, 0x3f20e7f5
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:336
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:332
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v14, 0x3f20e7f4
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:328
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:324
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v18, 0x3703c499
+; GFX10_W32-MUBUF-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:960
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:956
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:952
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:948
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:944
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], 0 offset:940
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v14, 0x3f3d349c
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v19, 0xbf523be1
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:936
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:932
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:928
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:924
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:920
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:916
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v19, off, s[0:3], 0 offset:912
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:908
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v14, 0xbf5f2ee2
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:904
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:900
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v16, off, s[0:3], 0 offset:896
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:892
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:888
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:884
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v19, off, s[0:3], 0 offset:880
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:876
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:872
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:868
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:864
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:860
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:856
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:852
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], 0 offset:848
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:844
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:840
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:836
+; GFX10_W32-MUBUF-NEXT:    buffer_load_dword v1, v6, s[0:3], 0 offen
+; GFX10_W32-MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; GFX10_W32-MUBUF-NEXT:    v_add_f32_e32 v0, v0, v1
+; GFX10_W32-MUBUF-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10_W32-MUBUF-NEXT:    ; return to shader part epilog
+;
+; GFX10_W64-MUBUF-LABEL: ps_main:
+; GFX10_W64-MUBUF:       ; %bb.0:
+; GFX10_W64-MUBUF-NEXT:    s_mov_b32 s4, s0
+; GFX10_W64-MUBUF-NEXT:    s_mov_b32 s0, SCRATCH_RSRC_DWORD0
+; GFX10_W64-MUBUF-NEXT:    s_mov_b32 s1, SCRATCH_RSRC_DWORD1
+; GFX10_W64-MUBUF-NEXT:    s_mov_b32 s2, -1
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v2, 0x3f3d349e
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v3, 0x3f523be1
+; GFX10_W64-MUBUF-NEXT:    s_mov_b32 s3, 0x31e16000
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v4, 0x3f5f2ee2
+; GFX10_W64-MUBUF-NEXT:    s_add_u32 s0, s0, s4
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v5, 0x3f638e37
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v7, 0xbefcd8a3
+; GFX10_W64-MUBUF-NEXT:    s_addc_u32 s1, s1, 0
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v8, 0xbefcd89f
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v9, 0xbeae29dc
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], 0 offset:448
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:444
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:440
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:436
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:432
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:428
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:424
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:420
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], 0 offset:416
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:412
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:408
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:404
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:400
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v1, 0xbe319356
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v7, 0xbe31934f
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v10, 0xb7043519
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v11, 0xb702e758
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v12, 0x3e31934f
+; GFX10_W64-MUBUF-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], 0 offset:396
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:392
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:388
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:384
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:380
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v7, 0x3e319356
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v10, 0x3eae29d8
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v11, 0x3eae29dc
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v12, 0x3efcd89c
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v13, 0x3efcd89f
+; GFX10_W64-MUBUF-NEXT:    v_and_b32_e32 v0, 0x1fc, v0
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:376
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:372
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:368
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:364
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:360
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v10, 0xbf20e7f5
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v15, 0xbf5f2ee3
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v12, 0xbf3d349e
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v14, 0xbf523be3
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v16, 0xbf638e39
+; GFX10_W64-MUBUF-NEXT:    v_add_nc_u32_e32 v6, 0x280, v0
+; GFX10_W64-MUBUF-NEXT:    v_add_nc_u32_e32 v0, 0x80, v0
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:356
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:352
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:348
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:344
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v16, off, s[0:3], 0 offset:340
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v17, 0x3f20e7f5
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:336
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:332
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v14, 0x3f20e7f4
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:328
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:324
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v18, 0x3703c499
+; GFX10_W64-MUBUF-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:960
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:956
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:952
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:948
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:944
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], 0 offset:940
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v14, 0x3f3d349c
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v19, 0xbf523be1
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:936
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:932
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:928
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:924
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:920
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:916
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v19, off, s[0:3], 0 offset:912
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:908
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v14, 0xbf5f2ee2
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:904
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:900
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v16, off, s[0:3], 0 offset:896
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:892
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:888
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:884
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v19, off, s[0:3], 0 offset:880
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:876
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:872
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:868
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:864
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:860
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:856
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:852
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], 0 offset:848
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:844
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:840
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:836
+; GFX10_W64-MUBUF-NEXT:    buffer_load_dword v1, v6, s[0:3], 0 offen
+; GFX10_W64-MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; GFX10_W64-MUBUF-NEXT:    v_add_f32_e32 v0, v0, v1
+; GFX10_W64-MUBUF-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10_W64-MUBUF-NEXT:    ; return to shader part epilog
+;
+; GFX9-FLATSCR-LABEL: ps_main:
+; GFX9-FLATSCR:       ; %bb.0:
+; GFX9-FLATSCR-NEXT:    s_add_u32 flat_scratch_lo, s0, s2
+; GFX9-FLATSCR-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-FLATSCR-NEXT:    s_addc_u32 flat_scratch_hi, s1, 0
+; GFX9-FLATSCR-NEXT:    v_and_b32_e32 v23, 0x1fc, v0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v0, 0xbf20e7f4
+; GFX9-FLATSCR-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v5, 0x3f5f2ee2
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v6, 0x3f523be1
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v7, 0x3f3d349e
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:448
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v4, 0x3f638e37
+; GFX9-FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v1, v7
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v2, v6
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v3, v5
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s2, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v12, 0x3eae29dc
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v13, 0x3eae29d8
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v14, 0x3e319356
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v15, 0x3e31934f
+; GFX9-FLATSCR-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[4:7], vcc_hi offset:432
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s2 offset:416
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[12:15], vcc_lo offset:368
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v0, 0xbeae29dc
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v15, 0xbf3d349e
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v16, 0xbf20e7f5
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v17, 0x3efcd89f
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v18, 0x3efcd89c
+; GFX9-FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v2, 0xbefcd89f
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v3, 0xbefcd8a3
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v1, v0
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s1, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v8, 0xb702e758
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v9, 0xb7043519
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v10, 0xbe31934f
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v11, 0xbe319356
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s0, 0
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[15:18], vcc_hi offset:352
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v21, 0xbf523be3
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v18, 0xbf5f2ee3
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s1 offset:400
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[8:11], s0 offset:384
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v19, 0xbf638e39
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v20, v18
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s2, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v8, 0x3f20e7f5
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v9, v15
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v10, v21
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s1, 0
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[18:21], s2 offset:336
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[7:10], s1 offset:320
+; GFX9-FLATSCR-NEXT:    v_add_u32_e32 v1, 0x80, v23
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v21, v18
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v20, v4
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v22, v5
+; GFX9-FLATSCR-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX9-FLATSCR-NEXT:    scratch_load_dword v13, v1, off
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v1, 0x3f20e7f4
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s0, 0
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[19:22], vcc_lo offset:896
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v18, 0xbf523be1
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v21, 0x3f3d349c
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v19, v6
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v20, v15
+; GFX9-FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[1:4], s0 offset:960
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[18:21], vcc_hi offset:912
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s3, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v20, 0xbf5f2ee2
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v21, v5
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v10, v7
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v7, v17
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s2, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v3, 0x3703c499
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v4, v14
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v5, v12
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v6, v17
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s1, 0
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[18:21], s3 offset:880
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[7:10], s2 offset:864
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[3:6], s1 offset:944
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v8, v16
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v9, v2
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v10, v0
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s0, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v4, v11
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v5, v3
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v6, v14
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v7, v12
+; GFX9-FLATSCR-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v17, v2
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v18, v0
+; GFX9-FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[8:11], s0 offset:928
+; GFX9-FLATSCR-NEXT:    v_add_u32_e32 v1, 0x280, v23
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[4:7], vcc_lo offset:848
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[15:18], vcc_hi offset:832
+; GFX9-FLATSCR-NEXT:    scratch_load_dword v0, v1, off
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    v_add_f32_e32 v0, v13, v0
+; GFX9-FLATSCR-NEXT:    ; return to shader part epilog
+;
+; GFX10-FLATSCR-LABEL: ps_main:
+; GFX10-FLATSCR:       ; %bb.0:
+; GFX10-FLATSCR-NEXT:    s_add_u32 s0, s0, s2
+; GFX10-FLATSCR-NEXT:    s_addc_u32 s1, s1, 0
+; GFX10-FLATSCR-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
+; GFX10-FLATSCR-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v6, 0x3f5f2ee2
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v8, 0x3f3d349e
+; GFX10-FLATSCR-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v7, 0x3f523be1
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v9, 0xbeae29dc
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v5, 0x3f638e37
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[1:4], off offset:448
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v2, v8
+; GFX10-FLATSCR-NEXT:    v_and_b32_e32 v0, 0x1fc, v0
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v3, v7
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v4, v6
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v11, 0xbefcd89f
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v12, 0xbefcd8a3
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v10, v9
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v16, 0xbf3d349e
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v20, 0xbf5f2ee3
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v23, 0xbf523be3
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[5:8], off offset:432
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[1:4], off offset:416
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[9:12], off offset:400
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v12, 0x3eae29dc
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v14, 0x3e319356
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v18, 0x3efcd89f
+; GFX10-FLATSCR-NEXT:    v_add_nc_u32_e32 v39, 0x280, v0
+; GFX10-FLATSCR-NEXT:    v_add_nc_u32_e32 v35, 0x80, v0
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v0, 0xb702e758
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v1, 0xb7043519
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v2, 0xbe31934f
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v3, 0xbe319356
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v17, 0xbf20e7f5
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v13, 0x3eae29d8
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v15, 0x3e31934f
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v19, 0x3efcd89c
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v21, 0xbf638e39
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v22, v20
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v24, 0x3f20e7f5
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v25, v16
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v26, v23
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v4, 0x3f20e7f4
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v27, 0x3703c499
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v28, v14
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v29, v12
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v30, v18
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], off offset:384
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[12:15], off offset:368
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[16:19], off offset:352
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[20:23], off offset:336
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:320
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v0, v17
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v1, v11
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v2, v9
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v31, 0xbf523be1
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v34, 0x3f3d349c
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v32, v7
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v33, v16
+; GFX10-FLATSCR-NEXT:    scratch_load_dword v10, v35, off
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v35, v21
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v36, v5
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v37, v20
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v38, v6
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[4:7], off offset:960
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[27:30], off offset:944
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], off offset:928
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[31:34], off offset:912
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v33, 0xbf5f2ee2
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v34, v6
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v23, v18
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v26, v8
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v0, v3
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v1, v27
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v2, v14
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v3, v12
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v18, v11
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v19, v9
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[35:38], off offset:896
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[31:34], off offset:880
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:864
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], off offset:848
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[16:19], off offset:832
+; GFX10-FLATSCR-NEXT:    scratch_load_dword v0, v39, off
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    v_add_f32_e32 v0, v10, v0
+; GFX10-FLATSCR-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-FLATSCR-NEXT:    ; return to shader part epilog
+;
+; GFX9-FLATSCR-PAL-LABEL: ps_main:
+; GFX9-FLATSCR-PAL:       ; %bb.0:
+; GFX9-FLATSCR-PAL-NEXT:    s_getpc_b64 s[2:3]
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s2, s0
+; GFX9-FLATSCR-PAL-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
+; GFX9-FLATSCR-PAL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-FLATSCR-PAL-NEXT:    v_and_b32_e32 v23, 0x1fc, v0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v0, 0xbf20e7f4
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX9-FLATSCR-PAL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-FLATSCR-PAL-NEXT:    s_and_b32 s3, s3, 0xffff
+; GFX9-FLATSCR-PAL-NEXT:    s_add_u32 flat_scratch_lo, s2, s0
+; GFX9-FLATSCR-PAL-NEXT:    s_addc_u32 flat_scratch_hi, s3, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v5, 0x3f5f2ee2
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v6, 0x3f523be1
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v7, 0x3f3d349e
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:448
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v4, 0x3f638e37
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v1, v7
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v2, v6
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v3, v5
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s2, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v12, 0x3eae29dc
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v13, 0x3eae29d8
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v14, 0x3e319356
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v15, 0x3e31934f
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[4:7], vcc_hi offset:432
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s2 offset:416
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[12:15], vcc_lo offset:368
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v0, 0xbeae29dc
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v15, 0xbf3d349e
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v16, 0xbf20e7f5
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v17, 0x3efcd89f
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v18, 0x3efcd89c
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v2, 0xbefcd89f
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v3, 0xbefcd8a3
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v1, v0
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s1, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v8, 0xb702e758
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v9, 0xb7043519
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v10, 0xbe31934f
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v11, 0xbe319356
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s0, 0
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[15:18], vcc_hi offset:352
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v21, 0xbf523be3
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v18, 0xbf5f2ee3
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s1 offset:400
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[8:11], s0 offset:384
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v19, 0xbf638e39
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v20, v18
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s2, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v8, 0x3f20e7f5
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v9, v15
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v10, v21
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s1, 0
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[18:21], s2 offset:336
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[7:10], s1 offset:320
+; GFX9-FLATSCR-PAL-NEXT:    v_add_u32_e32 v1, 0x80, v23
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v21, v18
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v20, v4
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v22, v5
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX9-FLATSCR-PAL-NEXT:    scratch_load_dword v13, v1, off
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v1, 0x3f20e7f4
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s0, 0
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[19:22], vcc_lo offset:896
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v18, 0xbf523be1
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v21, 0x3f3d349c
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v19, v6
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v20, v15
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[1:4], s0 offset:960
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[18:21], vcc_hi offset:912
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s3, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v20, 0xbf5f2ee2
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v21, v5
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v10, v7
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v7, v17
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s2, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v3, 0x3703c499
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v4, v14
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v5, v12
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v6, v17
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s1, 0
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[18:21], s3 offset:880
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[7:10], s2 offset:864
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[3:6], s1 offset:944
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v8, v16
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v9, v2
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v10, v0
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s0, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v4, v11
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v5, v3
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v6, v14
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v7, v12
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v17, v2
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v18, v0
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[8:11], s0 offset:928
+; GFX9-FLATSCR-PAL-NEXT:    v_add_u32_e32 v1, 0x280, v23
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[4:7], vcc_lo offset:848
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[15:18], vcc_hi offset:832
+; GFX9-FLATSCR-PAL-NEXT:    scratch_load_dword v0, v1, off
+; GFX9-FLATSCR-PAL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-PAL-NEXT:    v_add_f32_e32 v0, v13, v0
+; GFX9-FLATSCR-PAL-NEXT:    ; return to shader part epilog
+;
+; GFX10-FLATSCR-PAL-LABEL: ps_main:
+; GFX10-FLATSCR-PAL:       ; %bb.0:
+; GFX10-FLATSCR-PAL-NEXT:    s_getpc_b64 s[2:3]
+; GFX10-FLATSCR-PAL-NEXT:    s_mov_b32 s2, s0
+; GFX10-FLATSCR-PAL-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
+; GFX10-FLATSCR-PAL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-FLATSCR-PAL-NEXT:    s_and_b32 s3, s3, 0xffff
+; GFX10-FLATSCR-PAL-NEXT:    s_add_u32 s2, s2, s0
+; GFX10-FLATSCR-PAL-NEXT:    s_addc_u32 s3, s3, 0
+; GFX10-FLATSCR-PAL-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2
+; GFX10-FLATSCR-PAL-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v6, 0x3f5f2ee2
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v8, 0x3f3d349e
+; GFX10-FLATSCR-PAL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v7, 0x3f523be1
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v9, 0xbeae29dc
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v5, 0x3f638e37
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[1:4], off offset:448
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v2, v8
+; GFX10-FLATSCR-PAL-NEXT:    v_and_b32_e32 v0, 0x1fc, v0
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v3, v7
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v4, v6
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v11, 0xbefcd89f
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v12, 0xbefcd8a3
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v10, v9
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v16, 0xbf3d349e
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v20, 0xbf5f2ee3
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v23, 0xbf523be3
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[5:8], off offset:432
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[1:4], off offset:416
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[9:12], off offset:400
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v12, 0x3eae29dc
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v14, 0x3e319356
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v18, 0x3efcd89f
+; GFX10-FLATSCR-PAL-NEXT:    v_add_nc_u32_e32 v39, 0x280, v0
+; GFX10-FLATSCR-PAL-NEXT:    v_add_nc_u32_e32 v35, 0x80, v0
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v0, 0xb702e758
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v1, 0xb7043519
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v2, 0xbe31934f
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v3, 0xbe319356
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v17, 0xbf20e7f5
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v13, 0x3eae29d8
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v15, 0x3e31934f
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v19, 0x3efcd89c
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v21, 0xbf638e39
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v22, v20
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v24, 0x3f20e7f5
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v25, v16
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v26, v23
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v4, 0x3f20e7f4
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v27, 0x3703c499
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v28, v14
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v29, v12
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v30, v18
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], off offset:384
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[12:15], off offset:368
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[16:19], off offset:352
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[20:23], off offset:336
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:320
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v0, v17
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v1, v11
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v2, v9
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v31, 0xbf523be1
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v34, 0x3f3d349c
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v32, v7
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v33, v16
+; GFX10-FLATSCR-PAL-NEXT:    scratch_load_dword v10, v35, off
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v35, v21
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v36, v5
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v37, v20
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v38, v6
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[4:7], off offset:960
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[27:30], off offset:944
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], off offset:928
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[31:34], off offset:912
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v33, 0xbf5f2ee2
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v34, v6
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v23, v18
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v26, v8
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v0, v3
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v1, v27
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v2, v14
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v3, v12
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v18, v11
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v19, v9
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[35:38], off offset:896
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[31:34], off offset:880
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:864
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], off offset:848
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[16:19], off offset:832
+; GFX10-FLATSCR-PAL-NEXT:    scratch_load_dword v0, v39, off
+; GFX10-FLATSCR-PAL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-PAL-NEXT:    v_add_f32_e32 v0, v10, v0
+; GFX10-FLATSCR-PAL-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-FLATSCR-PAL-NEXT:    ; return to shader part epilog
   %v1 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0xBFEA477C60000000, float 0xBFEBE5DC60000000, float 0xBFEC71C720000000, float 0xBFEBE5DC60000000, float 0xBFEA477C60000000, float 0xBFE7A693C0000000, float 0xBFE41CFEA0000000, float 0x3FDF9B13E0000000, float 0x3FDF9B1380000000, float 0x3FD5C53B80000000, float 0x3FD5C53B00000000, float 0x3FC6326AC0000000, float 0x3FC63269E0000000, float 0xBEE05CEB00000000, float 0xBEE086A320000000, float 0xBFC63269E0000000, float 0xBFC6326AC0000000, float 0xBFD5C53B80000000, float 0xBFD5C53B80000000, float 0xBFDF9B13E0000000, float 0xBFDF9B1460000000, float 0xBFE41CFE80000000, float 0x3FE7A693C0000000, float 0x3FEA477C20000000, float 0x3FEBE5DC40000000, float 0x3FEC71C6E0000000, float 0x3FEBE5DC40000000, float 0x3FEA477C20000000, float 0x3FE7A693C0000000, float 0xBFE41CFE80000000>, i32 %idx
   %v2 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0x3FE7A693C0000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFEBE5DC40000000, float 0x3FEBE5DC40000000, float 0xBFEC71C720000000, float 0x3FEC71C6E0000000, float 0xBFEBE5DC60000000, float 0x3FEBE5DC40000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFE7A693C0000000, float 0x3FE7A69380000000, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFE80000000>, i32 %idx
   %r = fadd float %v1, %v2
   ret float %r
 }
 
-; GCN-LABEL: {{^}}vs_main:
-; GFX9-FLATSCR: s_add_u32 flat_scratch_lo, s0, s2
-; GFX9-FLATSCR: s_addc_u32 flat_scratch_hi, s1, 0
-
-; GFX10-FLATSCR: s_add_u32 s0, s0, s2
-; GFX10-FLATSCR: s_addc_u32 s1, s1, 0
-; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
-; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
-
-; GFX9-FLATSCR-PAL-DAG: s_getpc_b64 s[2:3]
-; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s2, s0
-; GFX9-FLATSCR-PAL-DAG: s_load_dwordx2 s[2:3], s[2:3], 0x0
-; GFX9-FLATSCR-PAL-DAG: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-FLATSCR-PAL-DAG: v_mov_b32_e32 v0, 0xbf20e7f4
-; GFX9-FLATSCR-PAL-DAG: s_mov_b32 vcc_hi, 0
-; GFX9-FLATSCR-PAL-DAG: s_waitcnt lgkmcnt(0)
-; GFX9-FLATSCR-PAL-DAG: s_and_b32 s3, s3, 0xffff
-; GFX9-FLATSCR-PAL-DAG: s_add_u32 flat_scratch_lo, s2, s0
-; GFX9-FLATSCR-PAL-DAG: s_addc_u32 flat_scratch_hi, s3, 0
-
-; GFX10-FLATSCR-PAL: s_getpc_b64 s[2:3]
-; GFX10-FLATSCR-PAL: s_mov_b32 s2, s0
-; GFX10-FLATSCR-PAL: s_load_dwordx2 s[2:3], s[2:3], 0x0
-; GFX10-FLATSCR-PAL: s_waitcnt lgkmcnt(0)
-; GFX10-FLATSCR-PAL: s_and_b32 s3, s3, 0xffff
-; GFX10-FLATSCR-PAL: s_add_u32 s2, s2, s0
-; GFX10-FLATSCR-PAL: s_addc_u32 s3, s3, 0
-; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2
-; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3
-
-; MUBUF-DAG: s_mov_b32 s0, SCRATCH_RSRC_DWORD0
-; GCN-NOT: s_mov_b32 s0
-
-; FLATSCR-NOT: SCRATCH_RSRC_DWORD
-
-; MUBUF: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
-; MUBUF: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
-
-; GFX9-FLATSCR: s_mov_b32 [[SP:[^,]+]], 0
-; GFX9-FLATSCR: scratch_store_dwordx4 off, v[{{[0-9:]+}}], [[SP]] offset:
-
-; FLATSCR: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off
-; FLATSCR: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off
-
 define amdgpu_vs float @vs_main(i32 %idx) {
+; SI-LABEL: vs_main:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_mov_b32 s4, s0
+; SI-NEXT:    s_mov_b32 s0, SCRATCH_RSRC_DWORD0
+; SI-NEXT:    s_mov_b32 s1, SCRATCH_RSRC_DWORD1
+; SI-NEXT:    s_mov_b32 s2, -1
+; SI-NEXT:    s_mov_b32 s3, 0xe8f000
+; SI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; SI-NEXT:    s_add_u32 s0, s0, s4
+; SI-NEXT:    v_and_b32_e32 v0, 0x1fc, v0
+; SI-NEXT:    v_mov_b32_e32 v2, 0x80
+; SI-NEXT:    s_addc_u32 s1, s1, 0
+; SI-NEXT:    v_add_i32_e32 v1, vcc, 0x280, v0
+; SI-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
+; SI-NEXT:    v_mov_b32_e32 v2, 0xbf20e7f4
+; SI-NEXT:    v_mov_b32_e32 v3, 0x3f3d349e
+; SI-NEXT:    v_mov_b32_e32 v4, 0x3f523be1
+; SI-NEXT:    v_mov_b32_e32 v5, 0x3f5f2ee2
+; SI-NEXT:    v_mov_b32_e32 v6, 0x3f638e37
+; SI-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:448
+; SI-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:444
+; SI-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:440
+; SI-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:436
+; SI-NEXT:    buffer_store_dword v6, off, s[0:3], 0 offset:432
+; SI-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:428
+; SI-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:424
+; SI-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:420
+; SI-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:416
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v2, 0xbefcd8a3
+; SI-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:412
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v2, 0xbefcd89f
+; SI-NEXT:    v_mov_b32_e32 v7, 0xbeae29dc
+; SI-NEXT:    v_mov_b32_e32 v9, 0xbe31934f
+; SI-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:408
+; SI-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:404
+; SI-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:400
+; SI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:392
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v9, 0xb7043519
+; SI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:388
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v9, 0xb702e758
+; SI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:384
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v9, 0x3e31934f
+; SI-NEXT:    v_mov_b32_e32 v10, 0x3eae29d8
+; SI-NEXT:    v_mov_b32_e32 v11, 0x3efcd89c
+; SI-NEXT:    v_mov_b32_e32 v8, 0xbe319356
+; SI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:380
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v9, 0x3e319356
+; SI-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:372
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v10, 0x3eae29dc
+; SI-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:364
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v11, 0x3efcd89f
+; SI-NEXT:    v_mov_b32_e32 v12, 0xbf20e7f5
+; SI-NEXT:    v_mov_b32_e32 v13, 0xbf3d349e
+; SI-NEXT:    v_mov_b32_e32 v14, 0xbf523be3
+; SI-NEXT:    v_mov_b32_e32 v15, 0xbf5f2ee3
+; SI-NEXT:    v_mov_b32_e32 v16, 0xbf638e39
+; SI-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:396
+; SI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:376
+; SI-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:368
+; SI-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:360
+; SI-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:356
+; SI-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:352
+; SI-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:348
+; SI-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:344
+; SI-NEXT:    buffer_store_dword v16, off, s[0:3], 0 offset:340
+; SI-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:336
+; SI-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:332
+; SI-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:328
+; SI-NEXT:    s_waitcnt expcnt(1)
+; SI-NEXT:    v_mov_b32_e32 v14, 0x3f20e7f5
+; SI-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:324
+; SI-NEXT:    v_mov_b32_e32 v17, 0x3f20e7f4
+; SI-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen
+; SI-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:960
+; SI-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:956
+; SI-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:952
+; SI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:948
+; SI-NEXT:    s_waitcnt expcnt(3)
+; SI-NEXT:    v_mov_b32_e32 v17, 0x3703c499
+; SI-NEXT:    v_mov_b32_e32 v18, 0x3f3d349c
+; SI-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:944
+; SI-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:940
+; SI-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:936
+; SI-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:932
+; SI-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:928
+; SI-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:924
+; SI-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:920
+; SI-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:916
+; SI-NEXT:    s_waitcnt expcnt(2)
+; SI-NEXT:    v_mov_b32_e32 v18, 0xbf523be1
+; SI-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:912
+; SI-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:908
+; SI-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:904
+; SI-NEXT:    buffer_store_dword v6, off, s[0:3], 0 offset:900
+; SI-NEXT:    buffer_store_dword v16, off, s[0:3], 0 offset:896
+; SI-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:892
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v5, 0xbf5f2ee2
+; SI-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:888
+; SI-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:884
+; SI-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:880
+; SI-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:876
+; SI-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:872
+; SI-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:868
+; SI-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:864
+; SI-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:860
+; SI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:856
+; SI-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:852
+; SI-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:848
+; SI-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:844
+; SI-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:840
+; SI-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:836
+; SI-NEXT:    buffer_load_dword v1, v1, s[0:3], 0 offen
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    v_add_f32_e32 v0, v0, v1
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    ; return to shader part epilog
+;
+; VI-LABEL: vs_main:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_mov_b32 s4, s0
+; VI-NEXT:    s_mov_b32 s0, SCRATCH_RSRC_DWORD0
+; VI-NEXT:    s_mov_b32 s1, SCRATCH_RSRC_DWORD1
+; VI-NEXT:    s_mov_b32 s2, -1
+; VI-NEXT:    s_mov_b32 s3, 0xe80000
+; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; VI-NEXT:    s_add_u32 s0, s0, s4
+; VI-NEXT:    v_and_b32_e32 v0, 0x1fc, v0
+; VI-NEXT:    v_mov_b32_e32 v2, 0x80
+; VI-NEXT:    s_addc_u32 s1, s1, 0
+; VI-NEXT:    v_add_u32_e32 v1, vcc, 0x280, v0
+; VI-NEXT:    v_add_u32_e32 v0, vcc, v2, v0
+; VI-NEXT:    v_mov_b32_e32 v2, 0xbf20e7f4
+; VI-NEXT:    v_mov_b32_e32 v3, 0x3f3d349e
+; VI-NEXT:    v_mov_b32_e32 v4, 0x3f523be1
+; VI-NEXT:    v_mov_b32_e32 v5, 0x3f5f2ee2
+; VI-NEXT:    v_mov_b32_e32 v6, 0x3f638e37
+; VI-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:448
+; VI-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:444
+; VI-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:440
+; VI-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:436
+; VI-NEXT:    buffer_store_dword v6, off, s[0:3], 0 offset:432
+; VI-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:428
+; VI-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:424
+; VI-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:420
+; VI-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:416
+; VI-NEXT:    v_mov_b32_e32 v2, 0xbefcd8a3
+; VI-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:412
+; VI-NEXT:    v_mov_b32_e32 v2, 0xbefcd89f
+; VI-NEXT:    v_mov_b32_e32 v7, 0xbeae29dc
+; VI-NEXT:    v_mov_b32_e32 v9, 0xbe31934f
+; VI-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:408
+; VI-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:404
+; VI-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:400
+; VI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:392
+; VI-NEXT:    v_mov_b32_e32 v9, 0xb7043519
+; VI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:388
+; VI-NEXT:    v_mov_b32_e32 v9, 0xb702e758
+; VI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:384
+; VI-NEXT:    v_mov_b32_e32 v9, 0x3e31934f
+; VI-NEXT:    v_mov_b32_e32 v10, 0x3eae29d8
+; VI-NEXT:    v_mov_b32_e32 v11, 0x3efcd89c
+; VI-NEXT:    v_mov_b32_e32 v8, 0xbe319356
+; VI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:380
+; VI-NEXT:    v_mov_b32_e32 v9, 0x3e319356
+; VI-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:372
+; VI-NEXT:    v_mov_b32_e32 v10, 0x3eae29dc
+; VI-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:364
+; VI-NEXT:    v_mov_b32_e32 v11, 0x3efcd89f
+; VI-NEXT:    v_mov_b32_e32 v12, 0xbf20e7f5
+; VI-NEXT:    v_mov_b32_e32 v13, 0xbf3d349e
+; VI-NEXT:    v_mov_b32_e32 v14, 0xbf523be3
+; VI-NEXT:    v_mov_b32_e32 v15, 0xbf5f2ee3
+; VI-NEXT:    v_mov_b32_e32 v16, 0xbf638e39
+; VI-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:396
+; VI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:376
+; VI-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:368
+; VI-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:360
+; VI-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:356
+; VI-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:352
+; VI-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:348
+; VI-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:344
+; VI-NEXT:    buffer_store_dword v16, off, s[0:3], 0 offset:340
+; VI-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:336
+; VI-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:332
+; VI-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:328
+; VI-NEXT:    v_mov_b32_e32 v14, 0x3f20e7f5
+; VI-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:324
+; VI-NEXT:    v_mov_b32_e32 v17, 0x3f20e7f4
+; VI-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen
+; VI-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:960
+; VI-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:956
+; VI-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:952
+; VI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:948
+; VI-NEXT:    v_mov_b32_e32 v17, 0x3703c499
+; VI-NEXT:    v_mov_b32_e32 v18, 0x3f3d349c
+; VI-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:944
+; VI-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:940
+; VI-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:936
+; VI-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:932
+; VI-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:928
+; VI-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:924
+; VI-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:920
+; VI-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:916
+; VI-NEXT:    v_mov_b32_e32 v18, 0xbf523be1
+; VI-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:912
+; VI-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:908
+; VI-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:904
+; VI-NEXT:    buffer_store_dword v6, off, s[0:3], 0 offset:900
+; VI-NEXT:    buffer_store_dword v16, off, s[0:3], 0 offset:896
+; VI-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:892
+; VI-NEXT:    v_mov_b32_e32 v5, 0xbf5f2ee2
+; VI-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:888
+; VI-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:884
+; VI-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:880
+; VI-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:876
+; VI-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:872
+; VI-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:868
+; VI-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:864
+; VI-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:860
+; VI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:856
+; VI-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:852
+; VI-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:848
+; VI-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:844
+; VI-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:840
+; VI-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:836
+; VI-NEXT:    buffer_load_dword v1, v1, s[0:3], 0 offen
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    v_add_f32_e32 v0, v0, v1
+; VI-NEXT:    ; return to shader part epilog
+;
+; GFX9-MUBUF-LABEL: vs_main:
+; GFX9-MUBUF:       ; %bb.0:
+; GFX9-MUBUF-NEXT:    s_mov_b32 s4, s0
+; GFX9-MUBUF-NEXT:    s_mov_b32 s0, SCRATCH_RSRC_DWORD0
+; GFX9-MUBUF-NEXT:    s_mov_b32 s1, SCRATCH_RSRC_DWORD1
+; GFX9-MUBUF-NEXT:    s_mov_b32 s2, -1
+; GFX9-MUBUF-NEXT:    s_mov_b32 s3, 0xe00000
+; GFX9-MUBUF-NEXT:    s_add_u32 s0, s0, s4
+; GFX9-MUBUF-NEXT:    s_addc_u32 s1, s1, 0
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v2, 0xbf20e7f4
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v3, 0x3f3d349e
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v4, 0x3f523be1
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v5, 0x3f5f2ee2
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v6, 0x3f638e37
+; GFX9-MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:448
+; GFX9-MUBUF-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:444
+; GFX9-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:440
+; GFX9-MUBUF-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:436
+; GFX9-MUBUF-NEXT:    buffer_store_dword v6, off, s[0:3], 0 offset:432
+; GFX9-MUBUF-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:428
+; GFX9-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:424
+; GFX9-MUBUF-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:420
+; GFX9-MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:416
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v2, 0xbefcd8a3
+; GFX9-MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:412
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v2, 0xbefcd89f
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v7, 0xbeae29dc
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v9, 0xbe31934f
+; GFX9-MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:408
+; GFX9-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:404
+; GFX9-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:400
+; GFX9-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:392
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v9, 0xb7043519
+; GFX9-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:388
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v9, 0xb702e758
+; GFX9-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:384
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v9, 0x3e31934f
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v10, 0x3eae29d8
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v11, 0x3efcd89c
+; GFX9-MUBUF-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v8, 0xbe319356
+; GFX9-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:380
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v9, 0x3e319356
+; GFX9-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:372
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v10, 0x3eae29dc
+; GFX9-MUBUF-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:364
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v11, 0x3efcd89f
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v12, 0xbf20e7f5
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v13, 0xbf3d349e
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v14, 0xbf523be3
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v15, 0xbf5f2ee3
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v16, 0xbf638e39
+; GFX9-MUBUF-NEXT:    v_and_b32_e32 v0, 0x1fc, v0
+; GFX9-MUBUF-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:396
+; GFX9-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:376
+; GFX9-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:368
+; GFX9-MUBUF-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:360
+; GFX9-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:356
+; GFX9-MUBUF-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:352
+; GFX9-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:348
+; GFX9-MUBUF-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:344
+; GFX9-MUBUF-NEXT:    buffer_store_dword v16, off, s[0:3], 0 offset:340
+; GFX9-MUBUF-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:336
+; GFX9-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:332
+; GFX9-MUBUF-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:328
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v14, 0x3f20e7f5
+; GFX9-MUBUF-NEXT:    v_add_u32_e32 v1, 0x280, v0
+; GFX9-MUBUF-NEXT:    v_add_u32_e32 v0, 0x80, v0
+; GFX9-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:324
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v17, 0x3f20e7f4
+; GFX9-MUBUF-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen
+; GFX9-MUBUF-NEXT:    s_nop 0
+; GFX9-MUBUF-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:960
+; GFX9-MUBUF-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:956
+; GFX9-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:952
+; GFX9-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:948
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v17, 0x3703c499
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v18, 0x3f3d349c
+; GFX9-MUBUF-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:944
+; GFX9-MUBUF-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:940
+; GFX9-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:936
+; GFX9-MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:932
+; GFX9-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:928
+; GFX9-MUBUF-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:924
+; GFX9-MUBUF-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:920
+; GFX9-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:916
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v18, 0xbf523be1
+; GFX9-MUBUF-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:912
+; GFX9-MUBUF-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:908
+; GFX9-MUBUF-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:904
+; GFX9-MUBUF-NEXT:    buffer_store_dword v6, off, s[0:3], 0 offset:900
+; GFX9-MUBUF-NEXT:    buffer_store_dword v16, off, s[0:3], 0 offset:896
+; GFX9-MUBUF-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:892
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v5, 0xbf5f2ee2
+; GFX9-MUBUF-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:888
+; GFX9-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:884
+; GFX9-MUBUF-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:880
+; GFX9-MUBUF-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:876
+; GFX9-MUBUF-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:872
+; GFX9-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:868
+; GFX9-MUBUF-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:864
+; GFX9-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:860
+; GFX9-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:856
+; GFX9-MUBUF-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:852
+; GFX9-MUBUF-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:848
+; GFX9-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:844
+; GFX9-MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:840
+; GFX9-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:836
+; GFX9-MUBUF-NEXT:    buffer_load_dword v1, v1, s[0:3], 0 offen
+; GFX9-MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-MUBUF-NEXT:    v_add_f32_e32 v0, v0, v1
+; GFX9-MUBUF-NEXT:    ; return to shader part epilog
+;
+; GFX10_W32-MUBUF-LABEL: vs_main:
+; GFX10_W32-MUBUF:       ; %bb.0:
+; GFX10_W32-MUBUF-NEXT:    s_mov_b32 s4, s0
+; GFX10_W32-MUBUF-NEXT:    s_mov_b32 s0, SCRATCH_RSRC_DWORD0
+; GFX10_W32-MUBUF-NEXT:    s_mov_b32 s1, SCRATCH_RSRC_DWORD1
+; GFX10_W32-MUBUF-NEXT:    s_mov_b32 s2, -1
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v2, 0x3f3d349e
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v3, 0x3f523be1
+; GFX10_W32-MUBUF-NEXT:    s_mov_b32 s3, 0x31c16000
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v4, 0x3f5f2ee2
+; GFX10_W32-MUBUF-NEXT:    s_add_u32 s0, s0, s4
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v5, 0x3f638e37
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v7, 0xbefcd8a3
+; GFX10_W32-MUBUF-NEXT:    s_addc_u32 s1, s1, 0
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v8, 0xbefcd89f
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v9, 0xbeae29dc
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], 0 offset:448
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:444
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:440
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:436
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:432
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:428
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:424
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:420
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], 0 offset:416
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:412
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:408
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:404
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:400
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v1, 0xbe319356
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v7, 0xbe31934f
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v10, 0xb7043519
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v11, 0xb702e758
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v12, 0x3e31934f
+; GFX10_W32-MUBUF-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], 0 offset:396
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:392
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:388
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:384
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:380
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v7, 0x3e319356
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v10, 0x3eae29d8
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v11, 0x3eae29dc
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v12, 0x3efcd89c
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v13, 0x3efcd89f
+; GFX10_W32-MUBUF-NEXT:    v_and_b32_e32 v0, 0x1fc, v0
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:376
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:372
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:368
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:364
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:360
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v10, 0xbf20e7f5
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v15, 0xbf5f2ee3
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v12, 0xbf3d349e
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v14, 0xbf523be3
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v16, 0xbf638e39
+; GFX10_W32-MUBUF-NEXT:    v_add_nc_u32_e32 v6, 0x280, v0
+; GFX10_W32-MUBUF-NEXT:    v_add_nc_u32_e32 v0, 0x80, v0
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:356
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:352
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:348
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:344
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v16, off, s[0:3], 0 offset:340
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v17, 0x3f20e7f5
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:336
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:332
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v14, 0x3f20e7f4
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:328
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:324
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v18, 0x3703c499
+; GFX10_W32-MUBUF-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:960
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:956
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:952
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:948
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:944
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], 0 offset:940
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v14, 0x3f3d349c
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v19, 0xbf523be1
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:936
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:932
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:928
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:924
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:920
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:916
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v19, off, s[0:3], 0 offset:912
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:908
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v14, 0xbf5f2ee2
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:904
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:900
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v16, off, s[0:3], 0 offset:896
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:892
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:888
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:884
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v19, off, s[0:3], 0 offset:880
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:876
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:872
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:868
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:864
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:860
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:856
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:852
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], 0 offset:848
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:844
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:840
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:836
+; GFX10_W32-MUBUF-NEXT:    buffer_load_dword v1, v6, s[0:3], 0 offen
+; GFX10_W32-MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; GFX10_W32-MUBUF-NEXT:    v_add_f32_e32 v0, v0, v1
+; GFX10_W32-MUBUF-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10_W32-MUBUF-NEXT:    ; return to shader part epilog
+;
+; GFX10_W64-MUBUF-LABEL: vs_main:
+; GFX10_W64-MUBUF:       ; %bb.0:
+; GFX10_W64-MUBUF-NEXT:    s_mov_b32 s4, s0
+; GFX10_W64-MUBUF-NEXT:    s_mov_b32 s0, SCRATCH_RSRC_DWORD0
+; GFX10_W64-MUBUF-NEXT:    s_mov_b32 s1, SCRATCH_RSRC_DWORD1
+; GFX10_W64-MUBUF-NEXT:    s_mov_b32 s2, -1
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v2, 0x3f3d349e
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v3, 0x3f523be1
+; GFX10_W64-MUBUF-NEXT:    s_mov_b32 s3, 0x31e16000
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v4, 0x3f5f2ee2
+; GFX10_W64-MUBUF-NEXT:    s_add_u32 s0, s0, s4
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v5, 0x3f638e37
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v7, 0xbefcd8a3
+; GFX10_W64-MUBUF-NEXT:    s_addc_u32 s1, s1, 0
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v8, 0xbefcd89f
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v9, 0xbeae29dc
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], 0 offset:448
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:444
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:440
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:436
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:432
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:428
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:424
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:420
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], 0 offset:416
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:412
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:408
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:404
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:400
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v1, 0xbe319356
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v7, 0xbe31934f
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v10, 0xb7043519
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v11, 0xb702e758
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v12, 0x3e31934f
+; GFX10_W64-MUBUF-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], 0 offset:396
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:392
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:388
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:384
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:380
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v7, 0x3e319356
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v10, 0x3eae29d8
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v11, 0x3eae29dc
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v12, 0x3efcd89c
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v13, 0x3efcd89f
+; GFX10_W64-MUBUF-NEXT:    v_and_b32_e32 v0, 0x1fc, v0
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:376
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:372
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:368
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:364
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:360
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v10, 0xbf20e7f5
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v15, 0xbf5f2ee3
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v12, 0xbf3d349e
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v14, 0xbf523be3
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v16, 0xbf638e39
+; GFX10_W64-MUBUF-NEXT:    v_add_nc_u32_e32 v6, 0x280, v0
+; GFX10_W64-MUBUF-NEXT:    v_add_nc_u32_e32 v0, 0x80, v0
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:356
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:352
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:348
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:344
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v16, off, s[0:3], 0 offset:340
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v17, 0x3f20e7f5
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:336
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:332
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v14, 0x3f20e7f4
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:328
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:324
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v18, 0x3703c499
+; GFX10_W64-MUBUF-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:960
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:956
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:952
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:948
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:944
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], 0 offset:940
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v14, 0x3f3d349c
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v19, 0xbf523be1
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:936
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:932
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:928
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:924
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:920
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:916
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v19, off, s[0:3], 0 offset:912
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:908
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v14, 0xbf5f2ee2
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:904
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:900
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v16, off, s[0:3], 0 offset:896
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:892
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:888
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:884
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v19, off, s[0:3], 0 offset:880
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:876
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:872
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:868
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:864
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:860
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:856
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:852
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], 0 offset:848
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:844
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:840
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:836
+; GFX10_W64-MUBUF-NEXT:    buffer_load_dword v1, v6, s[0:3], 0 offen
+; GFX10_W64-MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; GFX10_W64-MUBUF-NEXT:    v_add_f32_e32 v0, v0, v1
+; GFX10_W64-MUBUF-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10_W64-MUBUF-NEXT:    ; return to shader part epilog
+;
+; GFX9-FLATSCR-LABEL: vs_main:
+; GFX9-FLATSCR:       ; %bb.0:
+; GFX9-FLATSCR-NEXT:    s_add_u32 flat_scratch_lo, s0, s2
+; GFX9-FLATSCR-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-FLATSCR-NEXT:    s_addc_u32 flat_scratch_hi, s1, 0
+; GFX9-FLATSCR-NEXT:    v_and_b32_e32 v23, 0x1fc, v0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v0, 0xbf20e7f4
+; GFX9-FLATSCR-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v5, 0x3f5f2ee2
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v6, 0x3f523be1
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v7, 0x3f3d349e
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:448
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v4, 0x3f638e37
+; GFX9-FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v1, v7
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v2, v6
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v3, v5
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s2, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v12, 0x3eae29dc
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v13, 0x3eae29d8
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v14, 0x3e319356
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v15, 0x3e31934f
+; GFX9-FLATSCR-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[4:7], vcc_hi offset:432
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s2 offset:416
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[12:15], vcc_lo offset:368
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v0, 0xbeae29dc
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v15, 0xbf3d349e
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v16, 0xbf20e7f5
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v17, 0x3efcd89f
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v18, 0x3efcd89c
+; GFX9-FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v2, 0xbefcd89f
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v3, 0xbefcd8a3
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v1, v0
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s1, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v8, 0xb702e758
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v9, 0xb7043519
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v10, 0xbe31934f
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v11, 0xbe319356
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s0, 0
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[15:18], vcc_hi offset:352
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v21, 0xbf523be3
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v18, 0xbf5f2ee3
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s1 offset:400
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[8:11], s0 offset:384
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v19, 0xbf638e39
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v20, v18
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s2, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v8, 0x3f20e7f5
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v9, v15
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v10, v21
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s1, 0
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[18:21], s2 offset:336
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[7:10], s1 offset:320
+; GFX9-FLATSCR-NEXT:    v_add_u32_e32 v1, 0x80, v23
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v21, v18
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v20, v4
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v22, v5
+; GFX9-FLATSCR-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX9-FLATSCR-NEXT:    scratch_load_dword v13, v1, off
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v1, 0x3f20e7f4
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s0, 0
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[19:22], vcc_lo offset:896
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v18, 0xbf523be1
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v21, 0x3f3d349c
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v19, v6
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v20, v15
+; GFX9-FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[1:4], s0 offset:960
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[18:21], vcc_hi offset:912
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s3, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v20, 0xbf5f2ee2
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v21, v5
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v10, v7
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v7, v17
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s2, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v3, 0x3703c499
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v4, v14
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v5, v12
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v6, v17
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s1, 0
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[18:21], s3 offset:880
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[7:10], s2 offset:864
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[3:6], s1 offset:944
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v8, v16
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v9, v2
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v10, v0
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s0, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v4, v11
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v5, v3
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v6, v14
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v7, v12
+; GFX9-FLATSCR-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v17, v2
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v18, v0
+; GFX9-FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[8:11], s0 offset:928
+; GFX9-FLATSCR-NEXT:    v_add_u32_e32 v1, 0x280, v23
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[4:7], vcc_lo offset:848
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[15:18], vcc_hi offset:832
+; GFX9-FLATSCR-NEXT:    scratch_load_dword v0, v1, off
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    v_add_f32_e32 v0, v13, v0
+; GFX9-FLATSCR-NEXT:    ; return to shader part epilog
+;
+; GFX10-FLATSCR-LABEL: vs_main:
+; GFX10-FLATSCR:       ; %bb.0:
+; GFX10-FLATSCR-NEXT:    s_add_u32 s0, s0, s2
+; GFX10-FLATSCR-NEXT:    s_addc_u32 s1, s1, 0
+; GFX10-FLATSCR-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
+; GFX10-FLATSCR-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v6, 0x3f5f2ee2
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v8, 0x3f3d349e
+; GFX10-FLATSCR-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v7, 0x3f523be1
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v9, 0xbeae29dc
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v5, 0x3f638e37
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[1:4], off offset:448
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v2, v8
+; GFX10-FLATSCR-NEXT:    v_and_b32_e32 v0, 0x1fc, v0
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v3, v7
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v4, v6
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v11, 0xbefcd89f
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v12, 0xbefcd8a3
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v10, v9
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v16, 0xbf3d349e
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v20, 0xbf5f2ee3
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v23, 0xbf523be3
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[5:8], off offset:432
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[1:4], off offset:416
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[9:12], off offset:400
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v12, 0x3eae29dc
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v14, 0x3e319356
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v18, 0x3efcd89f
+; GFX10-FLATSCR-NEXT:    v_add_nc_u32_e32 v39, 0x280, v0
+; GFX10-FLATSCR-NEXT:    v_add_nc_u32_e32 v35, 0x80, v0
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v0, 0xb702e758
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v1, 0xb7043519
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v2, 0xbe31934f
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v3, 0xbe319356
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v17, 0xbf20e7f5
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v13, 0x3eae29d8
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v15, 0x3e31934f
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v19, 0x3efcd89c
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v21, 0xbf638e39
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v22, v20
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v24, 0x3f20e7f5
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v25, v16
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v26, v23
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v4, 0x3f20e7f4
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v27, 0x3703c499
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v28, v14
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v29, v12
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v30, v18
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], off offset:384
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[12:15], off offset:368
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[16:19], off offset:352
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[20:23], off offset:336
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:320
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v0, v17
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v1, v11
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v2, v9
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v31, 0xbf523be1
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v34, 0x3f3d349c
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v32, v7
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v33, v16
+; GFX10-FLATSCR-NEXT:    scratch_load_dword v10, v35, off
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v35, v21
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v36, v5
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v37, v20
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v38, v6
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[4:7], off offset:960
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[27:30], off offset:944
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], off offset:928
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[31:34], off offset:912
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v33, 0xbf5f2ee2
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v34, v6
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v23, v18
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v26, v8
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v0, v3
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v1, v27
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v2, v14
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v3, v12
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v18, v11
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v19, v9
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[35:38], off offset:896
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[31:34], off offset:880
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:864
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], off offset:848
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[16:19], off offset:832
+; GFX10-FLATSCR-NEXT:    scratch_load_dword v0, v39, off
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    v_add_f32_e32 v0, v10, v0
+; GFX10-FLATSCR-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-FLATSCR-NEXT:    ; return to shader part epilog
+;
+; GFX9-FLATSCR-PAL-LABEL: vs_main:
+; GFX9-FLATSCR-PAL:       ; %bb.0:
+; GFX9-FLATSCR-PAL-NEXT:    s_getpc_b64 s[2:3]
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s2, s0
+; GFX9-FLATSCR-PAL-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
+; GFX9-FLATSCR-PAL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-FLATSCR-PAL-NEXT:    v_and_b32_e32 v23, 0x1fc, v0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v0, 0xbf20e7f4
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX9-FLATSCR-PAL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-FLATSCR-PAL-NEXT:    s_and_b32 s3, s3, 0xffff
+; GFX9-FLATSCR-PAL-NEXT:    s_add_u32 flat_scratch_lo, s2, s0
+; GFX9-FLATSCR-PAL-NEXT:    s_addc_u32 flat_scratch_hi, s3, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v5, 0x3f5f2ee2
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v6, 0x3f523be1
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v7, 0x3f3d349e
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:448
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v4, 0x3f638e37
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v1, v7
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v2, v6
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v3, v5
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s2, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v12, 0x3eae29dc
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v13, 0x3eae29d8
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v14, 0x3e319356
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v15, 0x3e31934f
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[4:7], vcc_hi offset:432
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s2 offset:416
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[12:15], vcc_lo offset:368
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v0, 0xbeae29dc
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v15, 0xbf3d349e
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v16, 0xbf20e7f5
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v17, 0x3efcd89f
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v18, 0x3efcd89c
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v2, 0xbefcd89f
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v3, 0xbefcd8a3
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v1, v0
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s1, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v8, 0xb702e758
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v9, 0xb7043519
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v10, 0xbe31934f
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v11, 0xbe319356
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s0, 0
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[15:18], vcc_hi offset:352
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v21, 0xbf523be3
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v18, 0xbf5f2ee3
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s1 offset:400
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[8:11], s0 offset:384
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v19, 0xbf638e39
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v20, v18
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s2, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v8, 0x3f20e7f5
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v9, v15
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v10, v21
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s1, 0
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[18:21], s2 offset:336
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[7:10], s1 offset:320
+; GFX9-FLATSCR-PAL-NEXT:    v_add_u32_e32 v1, 0x80, v23
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v21, v18
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v20, v4
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v22, v5
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX9-FLATSCR-PAL-NEXT:    scratch_load_dword v13, v1, off
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v1, 0x3f20e7f4
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s0, 0
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[19:22], vcc_lo offset:896
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v18, 0xbf523be1
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v21, 0x3f3d349c
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v19, v6
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v20, v15
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[1:4], s0 offset:960
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[18:21], vcc_hi offset:912
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s3, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v20, 0xbf5f2ee2
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v21, v5
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v10, v7
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v7, v17
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s2, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v3, 0x3703c499
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v4, v14
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v5, v12
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v6, v17
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s1, 0
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[18:21], s3 offset:880
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[7:10], s2 offset:864
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[3:6], s1 offset:944
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v8, v16
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v9, v2
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v10, v0
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s0, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v4, v11
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v5, v3
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v6, v14
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v7, v12
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v17, v2
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v18, v0
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[8:11], s0 offset:928
+; GFX9-FLATSCR-PAL-NEXT:    v_add_u32_e32 v1, 0x280, v23
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[4:7], vcc_lo offset:848
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[15:18], vcc_hi offset:832
+; GFX9-FLATSCR-PAL-NEXT:    scratch_load_dword v0, v1, off
+; GFX9-FLATSCR-PAL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-PAL-NEXT:    v_add_f32_e32 v0, v13, v0
+; GFX9-FLATSCR-PAL-NEXT:    ; return to shader part epilog
+;
+; GFX10-FLATSCR-PAL-LABEL: vs_main:
+; GFX10-FLATSCR-PAL:       ; %bb.0:
+; GFX10-FLATSCR-PAL-NEXT:    s_getpc_b64 s[2:3]
+; GFX10-FLATSCR-PAL-NEXT:    s_mov_b32 s2, s0
+; GFX10-FLATSCR-PAL-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
+; GFX10-FLATSCR-PAL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-FLATSCR-PAL-NEXT:    s_and_b32 s3, s3, 0xffff
+; GFX10-FLATSCR-PAL-NEXT:    s_add_u32 s2, s2, s0
+; GFX10-FLATSCR-PAL-NEXT:    s_addc_u32 s3, s3, 0
+; GFX10-FLATSCR-PAL-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2
+; GFX10-FLATSCR-PAL-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v6, 0x3f5f2ee2
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v8, 0x3f3d349e
+; GFX10-FLATSCR-PAL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v7, 0x3f523be1
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v9, 0xbeae29dc
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v5, 0x3f638e37
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[1:4], off offset:448
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v2, v8
+; GFX10-FLATSCR-PAL-NEXT:    v_and_b32_e32 v0, 0x1fc, v0
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v3, v7
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v4, v6
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v11, 0xbefcd89f
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v12, 0xbefcd8a3
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v10, v9
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v16, 0xbf3d349e
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v20, 0xbf5f2ee3
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v23, 0xbf523be3
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[5:8], off offset:432
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[1:4], off offset:416
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[9:12], off offset:400
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v12, 0x3eae29dc
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v14, 0x3e319356
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v18, 0x3efcd89f
+; GFX10-FLATSCR-PAL-NEXT:    v_add_nc_u32_e32 v39, 0x280, v0
+; GFX10-FLATSCR-PAL-NEXT:    v_add_nc_u32_e32 v35, 0x80, v0
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v0, 0xb702e758
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v1, 0xb7043519
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v2, 0xbe31934f
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v3, 0xbe319356
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v17, 0xbf20e7f5
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v13, 0x3eae29d8
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v15, 0x3e31934f
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v19, 0x3efcd89c
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v21, 0xbf638e39
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v22, v20
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v24, 0x3f20e7f5
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v25, v16
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v26, v23
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v4, 0x3f20e7f4
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v27, 0x3703c499
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v28, v14
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v29, v12
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v30, v18
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], off offset:384
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[12:15], off offset:368
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[16:19], off offset:352
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[20:23], off offset:336
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:320
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v0, v17
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v1, v11
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v2, v9
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v31, 0xbf523be1
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v34, 0x3f3d349c
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v32, v7
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v33, v16
+; GFX10-FLATSCR-PAL-NEXT:    scratch_load_dword v10, v35, off
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v35, v21
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v36, v5
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v37, v20
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v38, v6
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[4:7], off offset:960
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[27:30], off offset:944
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], off offset:928
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[31:34], off offset:912
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v33, 0xbf5f2ee2
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v34, v6
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v23, v18
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v26, v8
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v0, v3
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v1, v27
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v2, v14
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v3, v12
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v18, v11
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v19, v9
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[35:38], off offset:896
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[31:34], off offset:880
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:864
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], off offset:848
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[16:19], off offset:832
+; GFX10-FLATSCR-PAL-NEXT:    scratch_load_dword v0, v39, off
+; GFX10-FLATSCR-PAL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-PAL-NEXT:    v_add_f32_e32 v0, v10, v0
+; GFX10-FLATSCR-PAL-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-FLATSCR-PAL-NEXT:    ; return to shader part epilog
   %v1 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0xBFEA477C60000000, float 0xBFEBE5DC60000000, float 0xBFEC71C720000000, float 0xBFEBE5DC60000000, float 0xBFEA477C60000000, float 0xBFE7A693C0000000, float 0xBFE41CFEA0000000, float 0x3FDF9B13E0000000, float 0x3FDF9B1380000000, float 0x3FD5C53B80000000, float 0x3FD5C53B00000000, float 0x3FC6326AC0000000, float 0x3FC63269E0000000, float 0xBEE05CEB00000000, float 0xBEE086A320000000, float 0xBFC63269E0000000, float 0xBFC6326AC0000000, float 0xBFD5C53B80000000, float 0xBFD5C53B80000000, float 0xBFDF9B13E0000000, float 0xBFDF9B1460000000, float 0xBFE41CFE80000000, float 0x3FE7A693C0000000, float 0x3FEA477C20000000, float 0x3FEBE5DC40000000, float 0x3FEC71C6E0000000, float 0x3FEBE5DC40000000, float 0x3FEA477C20000000, float 0x3FE7A693C0000000, float 0xBFE41CFE80000000>, i32 %idx
   %v2 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0x3FE7A693C0000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFEBE5DC40000000, float 0x3FEBE5DC40000000, float 0xBFEC71C720000000, float 0x3FEC71C6E0000000, float 0xBFEBE5DC60000000, float 0x3FEBE5DC40000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFE7A693C0000000, float 0x3FE7A69380000000, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFE80000000>, i32 %idx
   %r = fadd float %v1, %v2
   ret float %r
 }
 
-; GCN-LABEL: {{^}}cs_main:
-; GFX9-FLATSCR: s_add_u32 flat_scratch_lo, s0, s2
-; GFX9-FLATSCR: s_addc_u32 flat_scratch_hi, s1, 0
-
-; GFX10-FLATSCR: s_add_u32 s0, s0, s2
-; GFX10-FLATSCR: s_addc_u32 s1, s1, 0
-; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
-; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
-
-; GFX9-FLATSCR-PAL-DAG: s_getpc_b64 s[2:3]
-; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s2, s0
-; GFX9-FLATSCR-PAL-DAG: s_load_dwordx2 s[2:3], s[2:3], 0x10
-; GFX9-FLATSCR-PAL-DAG: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-FLATSCR-PAL-DAG: v_mov_b32_e32 v0, 0xbf20e7f4
-; GFX9-FLATSCR-PAL-DAG: s_mov_b32 vcc_hi, 0
-; GFX9-FLATSCR-PAL-DAG: s_waitcnt lgkmcnt(0)
-; GFX9-FLATSCR-PAL-DAG: s_and_b32 s3, s3, 0xffff
-; GFX9-FLATSCR-PAL-DAG: s_add_u32 flat_scratch_lo, s2, s0
-; GFX9-FLATSCR-PAL-DAG: s_addc_u32 flat_scratch_hi, s3, 0
-
-; GFX10-FLATSCR-PAL: s_getpc_b64 s[2:3]
-; GFX10-FLATSCR-PAL: s_mov_b32 s2, s0
-; GFX10-FLATSCR-PAL: s_load_dwordx2 s[2:3], s[2:3], 0x10
-; GFX10-FLATSCR-PAL: s_waitcnt lgkmcnt(0)
-; GFX10-FLATSCR-PAL: s_and_b32 s3, s3, 0xffff
-; GFX10-FLATSCR-PAL: s_add_u32 s2, s2, s0
-; GFX10-FLATSCR-PAL: s_addc_u32 s3, s3, 0
-; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2
-; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3
-
-; MUBUF-DAG: s_mov_b32 s0, SCRATCH_RSRC_DWORD0
-
-; FLATSCR-NOT: SCRATCH_RSRC_DWORD
-
-; MUBUF: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
-; MUBUF: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
-
-; FLATSCR: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off
-; FLATSCR: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off
 define amdgpu_cs float @cs_main(i32 %idx) {
+; SI-LABEL: cs_main:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_mov_b32 s4, s0
+; SI-NEXT:    s_mov_b32 s0, SCRATCH_RSRC_DWORD0
+; SI-NEXT:    s_mov_b32 s1, SCRATCH_RSRC_DWORD1
+; SI-NEXT:    s_mov_b32 s2, -1
+; SI-NEXT:    s_mov_b32 s3, 0xe8f000
+; SI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; SI-NEXT:    s_add_u32 s0, s0, s4
+; SI-NEXT:    v_and_b32_e32 v0, 0x1fc, v0
+; SI-NEXT:    v_mov_b32_e32 v2, 0x80
+; SI-NEXT:    s_addc_u32 s1, s1, 0
+; SI-NEXT:    v_add_i32_e32 v1, vcc, 0x280, v0
+; SI-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
+; SI-NEXT:    v_mov_b32_e32 v2, 0xbf20e7f4
+; SI-NEXT:    v_mov_b32_e32 v3, 0x3f3d349e
+; SI-NEXT:    v_mov_b32_e32 v4, 0x3f523be1
+; SI-NEXT:    v_mov_b32_e32 v5, 0x3f5f2ee2
+; SI-NEXT:    v_mov_b32_e32 v6, 0x3f638e37
+; SI-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:448
+; SI-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:444
+; SI-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:440
+; SI-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:436
+; SI-NEXT:    buffer_store_dword v6, off, s[0:3], 0 offset:432
+; SI-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:428
+; SI-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:424
+; SI-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:420
+; SI-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:416
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v2, 0xbefcd8a3
+; SI-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:412
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v2, 0xbefcd89f
+; SI-NEXT:    v_mov_b32_e32 v7, 0xbeae29dc
+; SI-NEXT:    v_mov_b32_e32 v9, 0xbe31934f
+; SI-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:408
+; SI-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:404
+; SI-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:400
+; SI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:392
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v9, 0xb7043519
+; SI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:388
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v9, 0xb702e758
+; SI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:384
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v9, 0x3e31934f
+; SI-NEXT:    v_mov_b32_e32 v10, 0x3eae29d8
+; SI-NEXT:    v_mov_b32_e32 v11, 0x3efcd89c
+; SI-NEXT:    v_mov_b32_e32 v8, 0xbe319356
+; SI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:380
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v9, 0x3e319356
+; SI-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:372
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v10, 0x3eae29dc
+; SI-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:364
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v11, 0x3efcd89f
+; SI-NEXT:    v_mov_b32_e32 v12, 0xbf20e7f5
+; SI-NEXT:    v_mov_b32_e32 v13, 0xbf3d349e
+; SI-NEXT:    v_mov_b32_e32 v14, 0xbf523be3
+; SI-NEXT:    v_mov_b32_e32 v15, 0xbf5f2ee3
+; SI-NEXT:    v_mov_b32_e32 v16, 0xbf638e39
+; SI-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:396
+; SI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:376
+; SI-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:368
+; SI-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:360
+; SI-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:356
+; SI-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:352
+; SI-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:348
+; SI-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:344
+; SI-NEXT:    buffer_store_dword v16, off, s[0:3], 0 offset:340
+; SI-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:336
+; SI-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:332
+; SI-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:328
+; SI-NEXT:    s_waitcnt expcnt(1)
+; SI-NEXT:    v_mov_b32_e32 v14, 0x3f20e7f5
+; SI-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:324
+; SI-NEXT:    v_mov_b32_e32 v17, 0x3f20e7f4
+; SI-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen
+; SI-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:960
+; SI-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:956
+; SI-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:952
+; SI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:948
+; SI-NEXT:    s_waitcnt expcnt(3)
+; SI-NEXT:    v_mov_b32_e32 v17, 0x3703c499
+; SI-NEXT:    v_mov_b32_e32 v18, 0x3f3d349c
+; SI-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:944
+; SI-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:940
+; SI-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:936
+; SI-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:932
+; SI-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:928
+; SI-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:924
+; SI-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:920
+; SI-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:916
+; SI-NEXT:    s_waitcnt expcnt(2)
+; SI-NEXT:    v_mov_b32_e32 v18, 0xbf523be1
+; SI-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:912
+; SI-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:908
+; SI-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:904
+; SI-NEXT:    buffer_store_dword v6, off, s[0:3], 0 offset:900
+; SI-NEXT:    buffer_store_dword v16, off, s[0:3], 0 offset:896
+; SI-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:892
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v5, 0xbf5f2ee2
+; SI-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:888
+; SI-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:884
+; SI-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:880
+; SI-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:876
+; SI-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:872
+; SI-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:868
+; SI-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:864
+; SI-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:860
+; SI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:856
+; SI-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:852
+; SI-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:848
+; SI-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:844
+; SI-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:840
+; SI-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:836
+; SI-NEXT:    buffer_load_dword v1, v1, s[0:3], 0 offen
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    v_add_f32_e32 v0, v0, v1
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    ; return to shader part epilog
+;
+; VI-LABEL: cs_main:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_mov_b32 s4, s0
+; VI-NEXT:    s_mov_b32 s0, SCRATCH_RSRC_DWORD0
+; VI-NEXT:    s_mov_b32 s1, SCRATCH_RSRC_DWORD1
+; VI-NEXT:    s_mov_b32 s2, -1
+; VI-NEXT:    s_mov_b32 s3, 0xe80000
+; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; VI-NEXT:    s_add_u32 s0, s0, s4
+; VI-NEXT:    v_and_b32_e32 v0, 0x1fc, v0
+; VI-NEXT:    v_mov_b32_e32 v2, 0x80
+; VI-NEXT:    s_addc_u32 s1, s1, 0
+; VI-NEXT:    v_add_u32_e32 v1, vcc, 0x280, v0
+; VI-NEXT:    v_add_u32_e32 v0, vcc, v2, v0
+; VI-NEXT:    v_mov_b32_e32 v2, 0xbf20e7f4
+; VI-NEXT:    v_mov_b32_e32 v3, 0x3f3d349e
+; VI-NEXT:    v_mov_b32_e32 v4, 0x3f523be1
+; VI-NEXT:    v_mov_b32_e32 v5, 0x3f5f2ee2
+; VI-NEXT:    v_mov_b32_e32 v6, 0x3f638e37
+; VI-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:448
+; VI-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:444
+; VI-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:440
+; VI-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:436
+; VI-NEXT:    buffer_store_dword v6, off, s[0:3], 0 offset:432
+; VI-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:428
+; VI-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:424
+; VI-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:420
+; VI-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:416
+; VI-NEXT:    v_mov_b32_e32 v2, 0xbefcd8a3
+; VI-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:412
+; VI-NEXT:    v_mov_b32_e32 v2, 0xbefcd89f
+; VI-NEXT:    v_mov_b32_e32 v7, 0xbeae29dc
+; VI-NEXT:    v_mov_b32_e32 v9, 0xbe31934f
+; VI-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:408
+; VI-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:404
+; VI-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:400
+; VI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:392
+; VI-NEXT:    v_mov_b32_e32 v9, 0xb7043519
+; VI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:388
+; VI-NEXT:    v_mov_b32_e32 v9, 0xb702e758
+; VI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:384
+; VI-NEXT:    v_mov_b32_e32 v9, 0x3e31934f
+; VI-NEXT:    v_mov_b32_e32 v10, 0x3eae29d8
+; VI-NEXT:    v_mov_b32_e32 v11, 0x3efcd89c
+; VI-NEXT:    v_mov_b32_e32 v8, 0xbe319356
+; VI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:380
+; VI-NEXT:    v_mov_b32_e32 v9, 0x3e319356
+; VI-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:372
+; VI-NEXT:    v_mov_b32_e32 v10, 0x3eae29dc
+; VI-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:364
+; VI-NEXT:    v_mov_b32_e32 v11, 0x3efcd89f
+; VI-NEXT:    v_mov_b32_e32 v12, 0xbf20e7f5
+; VI-NEXT:    v_mov_b32_e32 v13, 0xbf3d349e
+; VI-NEXT:    v_mov_b32_e32 v14, 0xbf523be3
+; VI-NEXT:    v_mov_b32_e32 v15, 0xbf5f2ee3
+; VI-NEXT:    v_mov_b32_e32 v16, 0xbf638e39
+; VI-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:396
+; VI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:376
+; VI-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:368
+; VI-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:360
+; VI-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:356
+; VI-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:352
+; VI-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:348
+; VI-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:344
+; VI-NEXT:    buffer_store_dword v16, off, s[0:3], 0 offset:340
+; VI-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:336
+; VI-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:332
+; VI-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:328
+; VI-NEXT:    v_mov_b32_e32 v14, 0x3f20e7f5
+; VI-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:324
+; VI-NEXT:    v_mov_b32_e32 v17, 0x3f20e7f4
+; VI-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen
+; VI-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:960
+; VI-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:956
+; VI-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:952
+; VI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:948
+; VI-NEXT:    v_mov_b32_e32 v17, 0x3703c499
+; VI-NEXT:    v_mov_b32_e32 v18, 0x3f3d349c
+; VI-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:944
+; VI-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:940
+; VI-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:936
+; VI-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:932
+; VI-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:928
+; VI-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:924
+; VI-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:920
+; VI-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:916
+; VI-NEXT:    v_mov_b32_e32 v18, 0xbf523be1
+; VI-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:912
+; VI-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:908
+; VI-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:904
+; VI-NEXT:    buffer_store_dword v6, off, s[0:3], 0 offset:900
+; VI-NEXT:    buffer_store_dword v16, off, s[0:3], 0 offset:896
+; VI-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:892
+; VI-NEXT:    v_mov_b32_e32 v5, 0xbf5f2ee2
+; VI-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:888
+; VI-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:884
+; VI-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:880
+; VI-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:876
+; VI-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:872
+; VI-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:868
+; VI-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:864
+; VI-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:860
+; VI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:856
+; VI-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:852
+; VI-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:848
+; VI-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:844
+; VI-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:840
+; VI-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:836
+; VI-NEXT:    buffer_load_dword v1, v1, s[0:3], 0 offen
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    v_add_f32_e32 v0, v0, v1
+; VI-NEXT:    ; return to shader part epilog
+;
+; GFX9-MUBUF-LABEL: cs_main:
+; GFX9-MUBUF:       ; %bb.0:
+; GFX9-MUBUF-NEXT:    s_mov_b32 s4, s0
+; GFX9-MUBUF-NEXT:    s_mov_b32 s0, SCRATCH_RSRC_DWORD0
+; GFX9-MUBUF-NEXT:    s_mov_b32 s1, SCRATCH_RSRC_DWORD1
+; GFX9-MUBUF-NEXT:    s_mov_b32 s2, -1
+; GFX9-MUBUF-NEXT:    s_mov_b32 s3, 0xe00000
+; GFX9-MUBUF-NEXT:    s_add_u32 s0, s0, s4
+; GFX9-MUBUF-NEXT:    s_addc_u32 s1, s1, 0
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v2, 0xbf20e7f4
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v3, 0x3f3d349e
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v4, 0x3f523be1
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v5, 0x3f5f2ee2
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v6, 0x3f638e37
+; GFX9-MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:448
+; GFX9-MUBUF-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:444
+; GFX9-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:440
+; GFX9-MUBUF-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:436
+; GFX9-MUBUF-NEXT:    buffer_store_dword v6, off, s[0:3], 0 offset:432
+; GFX9-MUBUF-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:428
+; GFX9-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:424
+; GFX9-MUBUF-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:420
+; GFX9-MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:416
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v2, 0xbefcd8a3
+; GFX9-MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:412
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v2, 0xbefcd89f
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v7, 0xbeae29dc
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v9, 0xbe31934f
+; GFX9-MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:408
+; GFX9-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:404
+; GFX9-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:400
+; GFX9-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:392
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v9, 0xb7043519
+; GFX9-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:388
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v9, 0xb702e758
+; GFX9-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:384
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v9, 0x3e31934f
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v10, 0x3eae29d8
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v11, 0x3efcd89c
+; GFX9-MUBUF-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v8, 0xbe319356
+; GFX9-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:380
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v9, 0x3e319356
+; GFX9-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:372
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v10, 0x3eae29dc
+; GFX9-MUBUF-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:364
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v11, 0x3efcd89f
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v12, 0xbf20e7f5
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v13, 0xbf3d349e
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v14, 0xbf523be3
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v15, 0xbf5f2ee3
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v16, 0xbf638e39
+; GFX9-MUBUF-NEXT:    v_and_b32_e32 v0, 0x1fc, v0
+; GFX9-MUBUF-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:396
+; GFX9-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:376
+; GFX9-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:368
+; GFX9-MUBUF-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:360
+; GFX9-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:356
+; GFX9-MUBUF-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:352
+; GFX9-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:348
+; GFX9-MUBUF-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:344
+; GFX9-MUBUF-NEXT:    buffer_store_dword v16, off, s[0:3], 0 offset:340
+; GFX9-MUBUF-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:336
+; GFX9-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:332
+; GFX9-MUBUF-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:328
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v14, 0x3f20e7f5
+; GFX9-MUBUF-NEXT:    v_add_u32_e32 v1, 0x280, v0
+; GFX9-MUBUF-NEXT:    v_add_u32_e32 v0, 0x80, v0
+; GFX9-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:324
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v17, 0x3f20e7f4
+; GFX9-MUBUF-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen
+; GFX9-MUBUF-NEXT:    s_nop 0
+; GFX9-MUBUF-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:960
+; GFX9-MUBUF-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:956
+; GFX9-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:952
+; GFX9-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:948
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v17, 0x3703c499
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v18, 0x3f3d349c
+; GFX9-MUBUF-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:944
+; GFX9-MUBUF-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:940
+; GFX9-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:936
+; GFX9-MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:932
+; GFX9-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:928
+; GFX9-MUBUF-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:924
+; GFX9-MUBUF-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:920
+; GFX9-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:916
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v18, 0xbf523be1
+; GFX9-MUBUF-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:912
+; GFX9-MUBUF-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:908
+; GFX9-MUBUF-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:904
+; GFX9-MUBUF-NEXT:    buffer_store_dword v6, off, s[0:3], 0 offset:900
+; GFX9-MUBUF-NEXT:    buffer_store_dword v16, off, s[0:3], 0 offset:896
+; GFX9-MUBUF-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:892
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v5, 0xbf5f2ee2
+; GFX9-MUBUF-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:888
+; GFX9-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:884
+; GFX9-MUBUF-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:880
+; GFX9-MUBUF-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:876
+; GFX9-MUBUF-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:872
+; GFX9-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:868
+; GFX9-MUBUF-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:864
+; GFX9-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:860
+; GFX9-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:856
+; GFX9-MUBUF-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:852
+; GFX9-MUBUF-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:848
+; GFX9-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:844
+; GFX9-MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:840
+; GFX9-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:836
+; GFX9-MUBUF-NEXT:    buffer_load_dword v1, v1, s[0:3], 0 offen
+; GFX9-MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-MUBUF-NEXT:    v_add_f32_e32 v0, v0, v1
+; GFX9-MUBUF-NEXT:    ; return to shader part epilog
+;
+; GFX10_W32-MUBUF-LABEL: cs_main:
+; GFX10_W32-MUBUF:       ; %bb.0:
+; GFX10_W32-MUBUF-NEXT:    s_mov_b32 s4, s0
+; GFX10_W32-MUBUF-NEXT:    s_mov_b32 s0, SCRATCH_RSRC_DWORD0
+; GFX10_W32-MUBUF-NEXT:    s_mov_b32 s1, SCRATCH_RSRC_DWORD1
+; GFX10_W32-MUBUF-NEXT:    s_mov_b32 s2, -1
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v2, 0x3f3d349e
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v3, 0x3f523be1
+; GFX10_W32-MUBUF-NEXT:    s_mov_b32 s3, 0x31c16000
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v4, 0x3f5f2ee2
+; GFX10_W32-MUBUF-NEXT:    s_add_u32 s0, s0, s4
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v5, 0x3f638e37
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v7, 0xbefcd8a3
+; GFX10_W32-MUBUF-NEXT:    s_addc_u32 s1, s1, 0
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v8, 0xbefcd89f
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v9, 0xbeae29dc
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], 0 offset:448
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:444
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:440
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:436
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:432
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:428
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:424
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:420
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], 0 offset:416
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:412
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:408
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:404
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:400
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v1, 0xbe319356
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v7, 0xbe31934f
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v10, 0xb7043519
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v11, 0xb702e758
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v12, 0x3e31934f
+; GFX10_W32-MUBUF-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], 0 offset:396
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:392
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:388
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:384
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:380
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v7, 0x3e319356
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v10, 0x3eae29d8
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v11, 0x3eae29dc
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v12, 0x3efcd89c
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v13, 0x3efcd89f
+; GFX10_W32-MUBUF-NEXT:    v_and_b32_e32 v0, 0x1fc, v0
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:376
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:372
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:368
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:364
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:360
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v10, 0xbf20e7f5
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v15, 0xbf5f2ee3
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v12, 0xbf3d349e
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v14, 0xbf523be3
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v16, 0xbf638e39
+; GFX10_W32-MUBUF-NEXT:    v_add_nc_u32_e32 v6, 0x280, v0
+; GFX10_W32-MUBUF-NEXT:    v_add_nc_u32_e32 v0, 0x80, v0
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:356
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:352
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:348
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:344
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v16, off, s[0:3], 0 offset:340
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v17, 0x3f20e7f5
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:336
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:332
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v14, 0x3f20e7f4
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:328
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:324
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v18, 0x3703c499
+; GFX10_W32-MUBUF-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:960
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:956
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:952
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:948
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:944
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], 0 offset:940
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v14, 0x3f3d349c
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v19, 0xbf523be1
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:936
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:932
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:928
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:924
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:920
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:916
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v19, off, s[0:3], 0 offset:912
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:908
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v14, 0xbf5f2ee2
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:904
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:900
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v16, off, s[0:3], 0 offset:896
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:892
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:888
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:884
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v19, off, s[0:3], 0 offset:880
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:876
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:872
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:868
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:864
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:860
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:856
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:852
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], 0 offset:848
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:844
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:840
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:836
+; GFX10_W32-MUBUF-NEXT:    buffer_load_dword v1, v6, s[0:3], 0 offen
+; GFX10_W32-MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; GFX10_W32-MUBUF-NEXT:    v_add_f32_e32 v0, v0, v1
+; GFX10_W32-MUBUF-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10_W32-MUBUF-NEXT:    ; return to shader part epilog
+;
+; GFX10_W64-MUBUF-LABEL: cs_main:
+; GFX10_W64-MUBUF:       ; %bb.0:
+; GFX10_W64-MUBUF-NEXT:    s_mov_b32 s4, s0
+; GFX10_W64-MUBUF-NEXT:    s_mov_b32 s0, SCRATCH_RSRC_DWORD0
+; GFX10_W64-MUBUF-NEXT:    s_mov_b32 s1, SCRATCH_RSRC_DWORD1
+; GFX10_W64-MUBUF-NEXT:    s_mov_b32 s2, -1
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v2, 0x3f3d349e
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v3, 0x3f523be1
+; GFX10_W64-MUBUF-NEXT:    s_mov_b32 s3, 0x31e16000
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v4, 0x3f5f2ee2
+; GFX10_W64-MUBUF-NEXT:    s_add_u32 s0, s0, s4
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v5, 0x3f638e37
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v7, 0xbefcd8a3
+; GFX10_W64-MUBUF-NEXT:    s_addc_u32 s1, s1, 0
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v8, 0xbefcd89f
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v9, 0xbeae29dc
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], 0 offset:448
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:444
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:440
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:436
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:432
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:428
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:424
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:420
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], 0 offset:416
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:412
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:408
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:404
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:400
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v1, 0xbe319356
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v7, 0xbe31934f
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v10, 0xb7043519
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v11, 0xb702e758
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v12, 0x3e31934f
+; GFX10_W64-MUBUF-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], 0 offset:396
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:392
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:388
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:384
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:380
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v7, 0x3e319356
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v10, 0x3eae29d8
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v11, 0x3eae29dc
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v12, 0x3efcd89c
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v13, 0x3efcd89f
+; GFX10_W64-MUBUF-NEXT:    v_and_b32_e32 v0, 0x1fc, v0
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:376
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:372
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:368
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:364
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:360
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v10, 0xbf20e7f5
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v15, 0xbf5f2ee3
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v12, 0xbf3d349e
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v14, 0xbf523be3
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v16, 0xbf638e39
+; GFX10_W64-MUBUF-NEXT:    v_add_nc_u32_e32 v6, 0x280, v0
+; GFX10_W64-MUBUF-NEXT:    v_add_nc_u32_e32 v0, 0x80, v0
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:356
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:352
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:348
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:344
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v16, off, s[0:3], 0 offset:340
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v17, 0x3f20e7f5
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:336
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:332
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v14, 0x3f20e7f4
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:328
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:324
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v18, 0x3703c499
+; GFX10_W64-MUBUF-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:960
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:956
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:952
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:948
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:944
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], 0 offset:940
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v14, 0x3f3d349c
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v19, 0xbf523be1
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:936
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:932
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:928
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:924
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:920
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:916
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v19, off, s[0:3], 0 offset:912
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:908
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v14, 0xbf5f2ee2
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:904
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:900
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v16, off, s[0:3], 0 offset:896
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:892
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:888
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:884
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v19, off, s[0:3], 0 offset:880
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:876
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:872
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:868
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:864
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:860
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:856
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:852
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], 0 offset:848
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:844
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:840
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:836
+; GFX10_W64-MUBUF-NEXT:    buffer_load_dword v1, v6, s[0:3], 0 offen
+; GFX10_W64-MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; GFX10_W64-MUBUF-NEXT:    v_add_f32_e32 v0, v0, v1
+; GFX10_W64-MUBUF-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10_W64-MUBUF-NEXT:    ; return to shader part epilog
+;
+; GFX9-FLATSCR-LABEL: cs_main:
+; GFX9-FLATSCR:       ; %bb.0:
+; GFX9-FLATSCR-NEXT:    s_add_u32 flat_scratch_lo, s0, s2
+; GFX9-FLATSCR-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-FLATSCR-NEXT:    s_addc_u32 flat_scratch_hi, s1, 0
+; GFX9-FLATSCR-NEXT:    v_and_b32_e32 v23, 0x1fc, v0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v0, 0xbf20e7f4
+; GFX9-FLATSCR-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v5, 0x3f5f2ee2
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v6, 0x3f523be1
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v7, 0x3f3d349e
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:448
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v4, 0x3f638e37
+; GFX9-FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v1, v7
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v2, v6
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v3, v5
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s2, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v12, 0x3eae29dc
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v13, 0x3eae29d8
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v14, 0x3e319356
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v15, 0x3e31934f
+; GFX9-FLATSCR-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[4:7], vcc_hi offset:432
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s2 offset:416
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[12:15], vcc_lo offset:368
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v0, 0xbeae29dc
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v15, 0xbf3d349e
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v16, 0xbf20e7f5
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v17, 0x3efcd89f
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v18, 0x3efcd89c
+; GFX9-FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v2, 0xbefcd89f
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v3, 0xbefcd8a3
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v1, v0
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s1, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v8, 0xb702e758
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v9, 0xb7043519
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v10, 0xbe31934f
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v11, 0xbe319356
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s0, 0
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[15:18], vcc_hi offset:352
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v21, 0xbf523be3
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v18, 0xbf5f2ee3
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s1 offset:400
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[8:11], s0 offset:384
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v19, 0xbf638e39
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v20, v18
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s2, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v8, 0x3f20e7f5
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v9, v15
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v10, v21
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s1, 0
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[18:21], s2 offset:336
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[7:10], s1 offset:320
+; GFX9-FLATSCR-NEXT:    v_add_u32_e32 v1, 0x80, v23
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v21, v18
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v20, v4
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v22, v5
+; GFX9-FLATSCR-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX9-FLATSCR-NEXT:    scratch_load_dword v13, v1, off
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v1, 0x3f20e7f4
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s0, 0
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[19:22], vcc_lo offset:896
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v18, 0xbf523be1
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v21, 0x3f3d349c
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v19, v6
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v20, v15
+; GFX9-FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[1:4], s0 offset:960
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[18:21], vcc_hi offset:912
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s3, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v20, 0xbf5f2ee2
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v21, v5
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v10, v7
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v7, v17
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s2, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v3, 0x3703c499
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v4, v14
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v5, v12
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v6, v17
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s1, 0
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[18:21], s3 offset:880
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[7:10], s2 offset:864
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[3:6], s1 offset:944
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v8, v16
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v9, v2
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v10, v0
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s0, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v4, v11
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v5, v3
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v6, v14
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v7, v12
+; GFX9-FLATSCR-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v17, v2
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v18, v0
+; GFX9-FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[8:11], s0 offset:928
+; GFX9-FLATSCR-NEXT:    v_add_u32_e32 v1, 0x280, v23
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[4:7], vcc_lo offset:848
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[15:18], vcc_hi offset:832
+; GFX9-FLATSCR-NEXT:    scratch_load_dword v0, v1, off
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    v_add_f32_e32 v0, v13, v0
+; GFX9-FLATSCR-NEXT:    ; return to shader part epilog
+;
+; GFX10-FLATSCR-LABEL: cs_main:
+; GFX10-FLATSCR:       ; %bb.0:
+; GFX10-FLATSCR-NEXT:    s_add_u32 s0, s0, s2
+; GFX10-FLATSCR-NEXT:    s_addc_u32 s1, s1, 0
+; GFX10-FLATSCR-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
+; GFX10-FLATSCR-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v6, 0x3f5f2ee2
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v8, 0x3f3d349e
+; GFX10-FLATSCR-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v7, 0x3f523be1
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v9, 0xbeae29dc
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v5, 0x3f638e37
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[1:4], off offset:448
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v2, v8
+; GFX10-FLATSCR-NEXT:    v_and_b32_e32 v0, 0x1fc, v0
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v3, v7
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v4, v6
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v11, 0xbefcd89f
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v12, 0xbefcd8a3
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v10, v9
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v16, 0xbf3d349e
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v20, 0xbf5f2ee3
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v23, 0xbf523be3
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[5:8], off offset:432
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[1:4], off offset:416
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[9:12], off offset:400
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v12, 0x3eae29dc
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v14, 0x3e319356
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v18, 0x3efcd89f
+; GFX10-FLATSCR-NEXT:    v_add_nc_u32_e32 v39, 0x280, v0
+; GFX10-FLATSCR-NEXT:    v_add_nc_u32_e32 v35, 0x80, v0
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v0, 0xb702e758
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v1, 0xb7043519
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v2, 0xbe31934f
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v3, 0xbe319356
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v17, 0xbf20e7f5
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v13, 0x3eae29d8
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v15, 0x3e31934f
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v19, 0x3efcd89c
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v21, 0xbf638e39
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v22, v20
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v24, 0x3f20e7f5
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v25, v16
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v26, v23
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v4, 0x3f20e7f4
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v27, 0x3703c499
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v28, v14
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v29, v12
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v30, v18
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], off offset:384
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[12:15], off offset:368
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[16:19], off offset:352
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[20:23], off offset:336
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:320
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v0, v17
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v1, v11
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v2, v9
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v31, 0xbf523be1
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v34, 0x3f3d349c
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v32, v7
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v33, v16
+; GFX10-FLATSCR-NEXT:    scratch_load_dword v10, v35, off
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v35, v21
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v36, v5
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v37, v20
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v38, v6
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[4:7], off offset:960
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[27:30], off offset:944
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], off offset:928
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[31:34], off offset:912
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v33, 0xbf5f2ee2
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v34, v6
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v23, v18
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v26, v8
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v0, v3
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v1, v27
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v2, v14
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v3, v12
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v18, v11
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v19, v9
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[35:38], off offset:896
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[31:34], off offset:880
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:864
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], off offset:848
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[16:19], off offset:832
+; GFX10-FLATSCR-NEXT:    scratch_load_dword v0, v39, off
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    v_add_f32_e32 v0, v10, v0
+; GFX10-FLATSCR-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-FLATSCR-NEXT:    ; return to shader part epilog
+;
+; GFX9-FLATSCR-PAL-LABEL: cs_main:
+; GFX9-FLATSCR-PAL:       ; %bb.0:
+; GFX9-FLATSCR-PAL-NEXT:    s_getpc_b64 s[2:3]
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s2, s0
+; GFX9-FLATSCR-PAL-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x10
+; GFX9-FLATSCR-PAL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-FLATSCR-PAL-NEXT:    v_and_b32_e32 v23, 0x1fc, v0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v0, 0xbf20e7f4
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX9-FLATSCR-PAL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-FLATSCR-PAL-NEXT:    s_and_b32 s3, s3, 0xffff
+; GFX9-FLATSCR-PAL-NEXT:    s_add_u32 flat_scratch_lo, s2, s0
+; GFX9-FLATSCR-PAL-NEXT:    s_addc_u32 flat_scratch_hi, s3, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v5, 0x3f5f2ee2
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v6, 0x3f523be1
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v7, 0x3f3d349e
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:448
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v4, 0x3f638e37
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v1, v7
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v2, v6
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v3, v5
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s2, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v12, 0x3eae29dc
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v13, 0x3eae29d8
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v14, 0x3e319356
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v15, 0x3e31934f
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[4:7], vcc_hi offset:432
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s2 offset:416
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[12:15], vcc_lo offset:368
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v0, 0xbeae29dc
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v15, 0xbf3d349e
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v16, 0xbf20e7f5
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v17, 0x3efcd89f
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v18, 0x3efcd89c
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v2, 0xbefcd89f
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v3, 0xbefcd8a3
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v1, v0
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s1, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v8, 0xb702e758
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v9, 0xb7043519
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v10, 0xbe31934f
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v11, 0xbe319356
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s0, 0
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[15:18], vcc_hi offset:352
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v21, 0xbf523be3
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v18, 0xbf5f2ee3
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s1 offset:400
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[8:11], s0 offset:384
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v19, 0xbf638e39
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v20, v18
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s2, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v8, 0x3f20e7f5
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v9, v15
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v10, v21
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s1, 0
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[18:21], s2 offset:336
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[7:10], s1 offset:320
+; GFX9-FLATSCR-PAL-NEXT:    v_add_u32_e32 v1, 0x80, v23
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v21, v18
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v20, v4
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v22, v5
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX9-FLATSCR-PAL-NEXT:    scratch_load_dword v13, v1, off
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v1, 0x3f20e7f4
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s0, 0
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[19:22], vcc_lo offset:896
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v18, 0xbf523be1
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v21, 0x3f3d349c
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v19, v6
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v20, v15
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[1:4], s0 offset:960
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[18:21], vcc_hi offset:912
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s3, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v20, 0xbf5f2ee2
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v21, v5
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v10, v7
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v7, v17
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s2, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v3, 0x3703c499
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v4, v14
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v5, v12
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v6, v17
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s1, 0
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[18:21], s3 offset:880
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[7:10], s2 offset:864
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[3:6], s1 offset:944
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v8, v16
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v9, v2
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v10, v0
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s0, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v4, v11
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v5, v3
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v6, v14
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v7, v12
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v17, v2
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v18, v0
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[8:11], s0 offset:928
+; GFX9-FLATSCR-PAL-NEXT:    v_add_u32_e32 v1, 0x280, v23
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[4:7], vcc_lo offset:848
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[15:18], vcc_hi offset:832
+; GFX9-FLATSCR-PAL-NEXT:    scratch_load_dword v0, v1, off
+; GFX9-FLATSCR-PAL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-PAL-NEXT:    v_add_f32_e32 v0, v13, v0
+; GFX9-FLATSCR-PAL-NEXT:    ; return to shader part epilog
+;
+; GFX10-FLATSCR-PAL-LABEL: cs_main:
+; GFX10-FLATSCR-PAL:       ; %bb.0:
+; GFX10-FLATSCR-PAL-NEXT:    s_getpc_b64 s[2:3]
+; GFX10-FLATSCR-PAL-NEXT:    s_mov_b32 s2, s0
+; GFX10-FLATSCR-PAL-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x10
+; GFX10-FLATSCR-PAL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-FLATSCR-PAL-NEXT:    s_and_b32 s3, s3, 0xffff
+; GFX10-FLATSCR-PAL-NEXT:    s_add_u32 s2, s2, s0
+; GFX10-FLATSCR-PAL-NEXT:    s_addc_u32 s3, s3, 0
+; GFX10-FLATSCR-PAL-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2
+; GFX10-FLATSCR-PAL-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v6, 0x3f5f2ee2
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v8, 0x3f3d349e
+; GFX10-FLATSCR-PAL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v7, 0x3f523be1
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v9, 0xbeae29dc
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v5, 0x3f638e37
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[1:4], off offset:448
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v2, v8
+; GFX10-FLATSCR-PAL-NEXT:    v_and_b32_e32 v0, 0x1fc, v0
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v3, v7
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v4, v6
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v11, 0xbefcd89f
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v12, 0xbefcd8a3
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v10, v9
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v16, 0xbf3d349e
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v20, 0xbf5f2ee3
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v23, 0xbf523be3
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[5:8], off offset:432
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[1:4], off offset:416
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[9:12], off offset:400
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v12, 0x3eae29dc
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v14, 0x3e319356
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v18, 0x3efcd89f
+; GFX10-FLATSCR-PAL-NEXT:    v_add_nc_u32_e32 v39, 0x280, v0
+; GFX10-FLATSCR-PAL-NEXT:    v_add_nc_u32_e32 v35, 0x80, v0
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v0, 0xb702e758
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v1, 0xb7043519
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v2, 0xbe31934f
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v3, 0xbe319356
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v17, 0xbf20e7f5
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v13, 0x3eae29d8
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v15, 0x3e31934f
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v19, 0x3efcd89c
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v21, 0xbf638e39
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v22, v20
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v24, 0x3f20e7f5
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v25, v16
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v26, v23
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v4, 0x3f20e7f4
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v27, 0x3703c499
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v28, v14
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v29, v12
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v30, v18
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], off offset:384
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[12:15], off offset:368
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[16:19], off offset:352
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[20:23], off offset:336
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:320
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v0, v17
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v1, v11
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v2, v9
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v31, 0xbf523be1
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v34, 0x3f3d349c
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v32, v7
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v33, v16
+; GFX10-FLATSCR-PAL-NEXT:    scratch_load_dword v10, v35, off
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v35, v21
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v36, v5
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v37, v20
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v38, v6
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[4:7], off offset:960
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[27:30], off offset:944
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], off offset:928
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[31:34], off offset:912
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v33, 0xbf5f2ee2
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v34, v6
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v23, v18
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v26, v8
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v0, v3
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v1, v27
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v2, v14
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v3, v12
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v18, v11
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v19, v9
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[35:38], off offset:896
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[31:34], off offset:880
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:864
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], off offset:848
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[16:19], off offset:832
+; GFX10-FLATSCR-PAL-NEXT:    scratch_load_dword v0, v39, off
+; GFX10-FLATSCR-PAL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-PAL-NEXT:    v_add_f32_e32 v0, v10, v0
+; GFX10-FLATSCR-PAL-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-FLATSCR-PAL-NEXT:    ; return to shader part epilog
   %v1 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0xBFEA477C60000000, float 0xBFEBE5DC60000000, float 0xBFEC71C720000000, float 0xBFEBE5DC60000000, float 0xBFEA477C60000000, float 0xBFE7A693C0000000, float 0xBFE41CFEA0000000, float 0x3FDF9B13E0000000, float 0x3FDF9B1380000000, float 0x3FD5C53B80000000, float 0x3FD5C53B00000000, float 0x3FC6326AC0000000, float 0x3FC63269E0000000, float 0xBEE05CEB00000000, float 0xBEE086A320000000, float 0xBFC63269E0000000, float 0xBFC6326AC0000000, float 0xBFD5C53B80000000, float 0xBFD5C53B80000000, float 0xBFDF9B13E0000000, float 0xBFDF9B1460000000, float 0xBFE41CFE80000000, float 0x3FE7A693C0000000, float 0x3FEA477C20000000, float 0x3FEBE5DC40000000, float 0x3FEC71C6E0000000, float 0x3FEBE5DC40000000, float 0x3FEA477C20000000, float 0x3FE7A693C0000000, float 0xBFE41CFE80000000>, i32 %idx
   %v2 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0x3FE7A693C0000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFEBE5DC40000000, float 0x3FEBE5DC40000000, float 0xBFEC71C720000000, float 0x3FEC71C6E0000000, float 0xBFEBE5DC60000000, float 0x3FEBE5DC40000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFE7A693C0000000, float 0x3FE7A69380000000, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFE80000000>, i32 %idx
   %r = fadd float %v1, %v2
   ret float %r
 }
 
-; GCN-LABEL: {{^}}hs_main:
-; GFX9-FLATSCR: s_add_u32 flat_scratch_lo, s0, s5
-; GFX9-FLATSCR: s_addc_u32 flat_scratch_hi, s1, 0
-
-; GFX10-FLATSCR: s_add_u32 s0, s0, s5
-; GFX10-FLATSCR: s_addc_u32 s1, s1, 0
-; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
-; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
-
-; SIVI: s_mov_b32 s0, SCRATCH_RSRC_DWORD0
-; SIVI-NOT: s_mov_b32 s0
-; SIVI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
-; SIVI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
-
-; GFX9_10-MUBUF: s_mov_b32 s0, SCRATCH_RSRC_DWORD0
-; GFX9_10-NOT:   s_mov_b32 s5
-; GFX9_10-MUBUF: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
-; GFX9_10-MUBUF: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
-
-; FLATSCR-NOT: SCRATCH_RSRC_DWORD
-; FLATSCR: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off
-; FLATSCR: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off
 define amdgpu_hs float @hs_main(i32 %idx) {
+; SI-LABEL: hs_main:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_mov_b32 s4, s0
+; SI-NEXT:    s_mov_b32 s0, SCRATCH_RSRC_DWORD0
+; SI-NEXT:    s_mov_b32 s1, SCRATCH_RSRC_DWORD1
+; SI-NEXT:    s_mov_b32 s2, -1
+; SI-NEXT:    s_mov_b32 s3, 0xe8f000
+; SI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; SI-NEXT:    s_add_u32 s0, s0, s4
+; SI-NEXT:    v_and_b32_e32 v0, 0x1fc, v0
+; SI-NEXT:    v_mov_b32_e32 v2, 0x80
+; SI-NEXT:    s_addc_u32 s1, s1, 0
+; SI-NEXT:    v_add_i32_e32 v1, vcc, 0x280, v0
+; SI-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
+; SI-NEXT:    v_mov_b32_e32 v2, 0xbf20e7f4
+; SI-NEXT:    v_mov_b32_e32 v3, 0x3f3d349e
+; SI-NEXT:    v_mov_b32_e32 v4, 0x3f523be1
+; SI-NEXT:    v_mov_b32_e32 v5, 0x3f5f2ee2
+; SI-NEXT:    v_mov_b32_e32 v6, 0x3f638e37
+; SI-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:448
+; SI-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:444
+; SI-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:440
+; SI-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:436
+; SI-NEXT:    buffer_store_dword v6, off, s[0:3], 0 offset:432
+; SI-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:428
+; SI-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:424
+; SI-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:420
+; SI-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:416
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v2, 0xbefcd8a3
+; SI-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:412
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v2, 0xbefcd89f
+; SI-NEXT:    v_mov_b32_e32 v7, 0xbeae29dc
+; SI-NEXT:    v_mov_b32_e32 v9, 0xbe31934f
+; SI-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:408
+; SI-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:404
+; SI-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:400
+; SI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:392
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v9, 0xb7043519
+; SI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:388
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v9, 0xb702e758
+; SI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:384
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v9, 0x3e31934f
+; SI-NEXT:    v_mov_b32_e32 v10, 0x3eae29d8
+; SI-NEXT:    v_mov_b32_e32 v11, 0x3efcd89c
+; SI-NEXT:    v_mov_b32_e32 v8, 0xbe319356
+; SI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:380
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v9, 0x3e319356
+; SI-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:372
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v10, 0x3eae29dc
+; SI-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:364
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v11, 0x3efcd89f
+; SI-NEXT:    v_mov_b32_e32 v12, 0xbf20e7f5
+; SI-NEXT:    v_mov_b32_e32 v13, 0xbf3d349e
+; SI-NEXT:    v_mov_b32_e32 v14, 0xbf523be3
+; SI-NEXT:    v_mov_b32_e32 v15, 0xbf5f2ee3
+; SI-NEXT:    v_mov_b32_e32 v16, 0xbf638e39
+; SI-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:396
+; SI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:376
+; SI-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:368
+; SI-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:360
+; SI-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:356
+; SI-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:352
+; SI-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:348
+; SI-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:344
+; SI-NEXT:    buffer_store_dword v16, off, s[0:3], 0 offset:340
+; SI-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:336
+; SI-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:332
+; SI-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:328
+; SI-NEXT:    s_waitcnt expcnt(1)
+; SI-NEXT:    v_mov_b32_e32 v14, 0x3f20e7f5
+; SI-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:324
+; SI-NEXT:    v_mov_b32_e32 v17, 0x3f20e7f4
+; SI-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen
+; SI-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:960
+; SI-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:956
+; SI-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:952
+; SI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:948
+; SI-NEXT:    s_waitcnt expcnt(3)
+; SI-NEXT:    v_mov_b32_e32 v17, 0x3703c499
+; SI-NEXT:    v_mov_b32_e32 v18, 0x3f3d349c
+; SI-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:944
+; SI-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:940
+; SI-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:936
+; SI-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:932
+; SI-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:928
+; SI-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:924
+; SI-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:920
+; SI-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:916
+; SI-NEXT:    s_waitcnt expcnt(2)
+; SI-NEXT:    v_mov_b32_e32 v18, 0xbf523be1
+; SI-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:912
+; SI-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:908
+; SI-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:904
+; SI-NEXT:    buffer_store_dword v6, off, s[0:3], 0 offset:900
+; SI-NEXT:    buffer_store_dword v16, off, s[0:3], 0 offset:896
+; SI-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:892
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v5, 0xbf5f2ee2
+; SI-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:888
+; SI-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:884
+; SI-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:880
+; SI-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:876
+; SI-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:872
+; SI-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:868
+; SI-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:864
+; SI-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:860
+; SI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:856
+; SI-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:852
+; SI-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:848
+; SI-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:844
+; SI-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:840
+; SI-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:836
+; SI-NEXT:    buffer_load_dword v1, v1, s[0:3], 0 offen
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    v_add_f32_e32 v0, v0, v1
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    ; return to shader part epilog
+;
+; VI-LABEL: hs_main:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_mov_b32 s4, s0
+; VI-NEXT:    s_mov_b32 s0, SCRATCH_RSRC_DWORD0
+; VI-NEXT:    s_mov_b32 s1, SCRATCH_RSRC_DWORD1
+; VI-NEXT:    s_mov_b32 s2, -1
+; VI-NEXT:    s_mov_b32 s3, 0xe80000
+; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; VI-NEXT:    s_add_u32 s0, s0, s4
+; VI-NEXT:    v_and_b32_e32 v0, 0x1fc, v0
+; VI-NEXT:    v_mov_b32_e32 v2, 0x80
+; VI-NEXT:    s_addc_u32 s1, s1, 0
+; VI-NEXT:    v_add_u32_e32 v1, vcc, 0x280, v0
+; VI-NEXT:    v_add_u32_e32 v0, vcc, v2, v0
+; VI-NEXT:    v_mov_b32_e32 v2, 0xbf20e7f4
+; VI-NEXT:    v_mov_b32_e32 v3, 0x3f3d349e
+; VI-NEXT:    v_mov_b32_e32 v4, 0x3f523be1
+; VI-NEXT:    v_mov_b32_e32 v5, 0x3f5f2ee2
+; VI-NEXT:    v_mov_b32_e32 v6, 0x3f638e37
+; VI-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:448
+; VI-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:444
+; VI-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:440
+; VI-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:436
+; VI-NEXT:    buffer_store_dword v6, off, s[0:3], 0 offset:432
+; VI-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:428
+; VI-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:424
+; VI-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:420
+; VI-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:416
+; VI-NEXT:    v_mov_b32_e32 v2, 0xbefcd8a3
+; VI-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:412
+; VI-NEXT:    v_mov_b32_e32 v2, 0xbefcd89f
+; VI-NEXT:    v_mov_b32_e32 v7, 0xbeae29dc
+; VI-NEXT:    v_mov_b32_e32 v9, 0xbe31934f
+; VI-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:408
+; VI-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:404
+; VI-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:400
+; VI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:392
+; VI-NEXT:    v_mov_b32_e32 v9, 0xb7043519
+; VI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:388
+; VI-NEXT:    v_mov_b32_e32 v9, 0xb702e758
+; VI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:384
+; VI-NEXT:    v_mov_b32_e32 v9, 0x3e31934f
+; VI-NEXT:    v_mov_b32_e32 v10, 0x3eae29d8
+; VI-NEXT:    v_mov_b32_e32 v11, 0x3efcd89c
+; VI-NEXT:    v_mov_b32_e32 v8, 0xbe319356
+; VI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:380
+; VI-NEXT:    v_mov_b32_e32 v9, 0x3e319356
+; VI-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:372
+; VI-NEXT:    v_mov_b32_e32 v10, 0x3eae29dc
+; VI-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:364
+; VI-NEXT:    v_mov_b32_e32 v11, 0x3efcd89f
+; VI-NEXT:    v_mov_b32_e32 v12, 0xbf20e7f5
+; VI-NEXT:    v_mov_b32_e32 v13, 0xbf3d349e
+; VI-NEXT:    v_mov_b32_e32 v14, 0xbf523be3
+; VI-NEXT:    v_mov_b32_e32 v15, 0xbf5f2ee3
+; VI-NEXT:    v_mov_b32_e32 v16, 0xbf638e39
+; VI-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:396
+; VI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:376
+; VI-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:368
+; VI-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:360
+; VI-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:356
+; VI-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:352
+; VI-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:348
+; VI-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:344
+; VI-NEXT:    buffer_store_dword v16, off, s[0:3], 0 offset:340
+; VI-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:336
+; VI-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:332
+; VI-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:328
+; VI-NEXT:    v_mov_b32_e32 v14, 0x3f20e7f5
+; VI-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:324
+; VI-NEXT:    v_mov_b32_e32 v17, 0x3f20e7f4
+; VI-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen
+; VI-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:960
+; VI-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:956
+; VI-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:952
+; VI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:948
+; VI-NEXT:    v_mov_b32_e32 v17, 0x3703c499
+; VI-NEXT:    v_mov_b32_e32 v18, 0x3f3d349c
+; VI-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:944
+; VI-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:940
+; VI-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:936
+; VI-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:932
+; VI-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:928
+; VI-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:924
+; VI-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:920
+; VI-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:916
+; VI-NEXT:    v_mov_b32_e32 v18, 0xbf523be1
+; VI-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:912
+; VI-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:908
+; VI-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:904
+; VI-NEXT:    buffer_store_dword v6, off, s[0:3], 0 offset:900
+; VI-NEXT:    buffer_store_dword v16, off, s[0:3], 0 offset:896
+; VI-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:892
+; VI-NEXT:    v_mov_b32_e32 v5, 0xbf5f2ee2
+; VI-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:888
+; VI-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:884
+; VI-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:880
+; VI-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:876
+; VI-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:872
+; VI-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:868
+; VI-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:864
+; VI-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:860
+; VI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:856
+; VI-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:852
+; VI-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:848
+; VI-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:844
+; VI-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:840
+; VI-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:836
+; VI-NEXT:    buffer_load_dword v1, v1, s[0:3], 0 offen
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    v_add_f32_e32 v0, v0, v1
+; VI-NEXT:    ; return to shader part epilog
+;
+; GFX9-MUBUF-LABEL: hs_main:
+; GFX9-MUBUF:       ; %bb.0:
+; GFX9-MUBUF-NEXT:    s_mov_b32 s0, SCRATCH_RSRC_DWORD0
+; GFX9-MUBUF-NEXT:    s_mov_b32 s1, SCRATCH_RSRC_DWORD1
+; GFX9-MUBUF-NEXT:    s_mov_b32 s2, -1
+; GFX9-MUBUF-NEXT:    s_mov_b32 s3, 0xe00000
+; GFX9-MUBUF-NEXT:    s_add_u32 s0, s0, s5
+; GFX9-MUBUF-NEXT:    s_addc_u32 s1, s1, 0
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v2, 0xbf20e7f4
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v3, 0x3f3d349e
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v4, 0x3f523be1
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v5, 0x3f5f2ee2
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v6, 0x3f638e37
+; GFX9-MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:448
+; GFX9-MUBUF-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:444
+; GFX9-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:440
+; GFX9-MUBUF-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:436
+; GFX9-MUBUF-NEXT:    buffer_store_dword v6, off, s[0:3], 0 offset:432
+; GFX9-MUBUF-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:428
+; GFX9-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:424
+; GFX9-MUBUF-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:420
+; GFX9-MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:416
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v2, 0xbefcd8a3
+; GFX9-MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:412
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v2, 0xbefcd89f
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v7, 0xbeae29dc
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v9, 0xbe31934f
+; GFX9-MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:408
+; GFX9-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:404
+; GFX9-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:400
+; GFX9-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:392
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v9, 0xb7043519
+; GFX9-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:388
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v9, 0xb702e758
+; GFX9-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:384
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v9, 0x3e31934f
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v10, 0x3eae29d8
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v11, 0x3efcd89c
+; GFX9-MUBUF-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v8, 0xbe319356
+; GFX9-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:380
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v9, 0x3e319356
+; GFX9-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:372
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v10, 0x3eae29dc
+; GFX9-MUBUF-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:364
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v11, 0x3efcd89f
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v12, 0xbf20e7f5
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v13, 0xbf3d349e
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v14, 0xbf523be3
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v15, 0xbf5f2ee3
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v16, 0xbf638e39
+; GFX9-MUBUF-NEXT:    v_and_b32_e32 v0, 0x1fc, v0
+; GFX9-MUBUF-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:396
+; GFX9-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:376
+; GFX9-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:368
+; GFX9-MUBUF-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:360
+; GFX9-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:356
+; GFX9-MUBUF-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:352
+; GFX9-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:348
+; GFX9-MUBUF-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:344
+; GFX9-MUBUF-NEXT:    buffer_store_dword v16, off, s[0:3], 0 offset:340
+; GFX9-MUBUF-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:336
+; GFX9-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:332
+; GFX9-MUBUF-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:328
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v14, 0x3f20e7f5
+; GFX9-MUBUF-NEXT:    v_add_u32_e32 v1, 0x280, v0
+; GFX9-MUBUF-NEXT:    v_add_u32_e32 v0, 0x80, v0
+; GFX9-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:324
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v17, 0x3f20e7f4
+; GFX9-MUBUF-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen
+; GFX9-MUBUF-NEXT:    s_nop 0
+; GFX9-MUBUF-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:960
+; GFX9-MUBUF-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:956
+; GFX9-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:952
+; GFX9-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:948
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v17, 0x3703c499
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v18, 0x3f3d349c
+; GFX9-MUBUF-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:944
+; GFX9-MUBUF-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:940
+; GFX9-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:936
+; GFX9-MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:932
+; GFX9-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:928
+; GFX9-MUBUF-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:924
+; GFX9-MUBUF-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:920
+; GFX9-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:916
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v18, 0xbf523be1
+; GFX9-MUBUF-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:912
+; GFX9-MUBUF-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:908
+; GFX9-MUBUF-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:904
+; GFX9-MUBUF-NEXT:    buffer_store_dword v6, off, s[0:3], 0 offset:900
+; GFX9-MUBUF-NEXT:    buffer_store_dword v16, off, s[0:3], 0 offset:896
+; GFX9-MUBUF-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:892
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v5, 0xbf5f2ee2
+; GFX9-MUBUF-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:888
+; GFX9-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:884
+; GFX9-MUBUF-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:880
+; GFX9-MUBUF-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:876
+; GFX9-MUBUF-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:872
+; GFX9-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:868
+; GFX9-MUBUF-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:864
+; GFX9-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:860
+; GFX9-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:856
+; GFX9-MUBUF-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:852
+; GFX9-MUBUF-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:848
+; GFX9-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:844
+; GFX9-MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:840
+; GFX9-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:836
+; GFX9-MUBUF-NEXT:    buffer_load_dword v1, v1, s[0:3], 0 offen
+; GFX9-MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-MUBUF-NEXT:    v_add_f32_e32 v0, v0, v1
+; GFX9-MUBUF-NEXT:    ; return to shader part epilog
+;
+; GFX10_W32-MUBUF-LABEL: hs_main:
+; GFX10_W32-MUBUF:       ; %bb.0:
+; GFX10_W32-MUBUF-NEXT:    s_mov_b32 s0, SCRATCH_RSRC_DWORD0
+; GFX10_W32-MUBUF-NEXT:    s_mov_b32 s1, SCRATCH_RSRC_DWORD1
+; GFX10_W32-MUBUF-NEXT:    s_mov_b32 s2, -1
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v2, 0x3f3d349e
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v3, 0x3f523be1
+; GFX10_W32-MUBUF-NEXT:    s_mov_b32 s3, 0x31c16000
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v4, 0x3f5f2ee2
+; GFX10_W32-MUBUF-NEXT:    s_add_u32 s0, s0, s5
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v5, 0x3f638e37
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v7, 0xbefcd8a3
+; GFX10_W32-MUBUF-NEXT:    s_addc_u32 s1, s1, 0
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v8, 0xbefcd89f
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v9, 0xbeae29dc
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], 0 offset:448
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:444
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:440
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:436
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:432
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:428
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:424
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:420
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], 0 offset:416
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:412
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:408
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:404
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:400
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v1, 0xbe319356
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v7, 0xbe31934f
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v10, 0xb7043519
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v11, 0xb702e758
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v12, 0x3e31934f
+; GFX10_W32-MUBUF-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], 0 offset:396
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:392
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:388
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:384
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:380
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v7, 0x3e319356
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v10, 0x3eae29d8
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v11, 0x3eae29dc
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v12, 0x3efcd89c
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v13, 0x3efcd89f
+; GFX10_W32-MUBUF-NEXT:    v_and_b32_e32 v0, 0x1fc, v0
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:376
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:372
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:368
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:364
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:360
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v10, 0xbf20e7f5
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v15, 0xbf5f2ee3
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v12, 0xbf3d349e
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v14, 0xbf523be3
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v16, 0xbf638e39
+; GFX10_W32-MUBUF-NEXT:    v_add_nc_u32_e32 v6, 0x280, v0
+; GFX10_W32-MUBUF-NEXT:    v_add_nc_u32_e32 v0, 0x80, v0
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:356
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:352
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:348
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:344
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v16, off, s[0:3], 0 offset:340
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v17, 0x3f20e7f5
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:336
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:332
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v14, 0x3f20e7f4
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:328
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:324
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v18, 0x3703c499
+; GFX10_W32-MUBUF-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:960
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:956
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:952
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:948
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:944
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], 0 offset:940
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v14, 0x3f3d349c
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v19, 0xbf523be1
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:936
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:932
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:928
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:924
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:920
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:916
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v19, off, s[0:3], 0 offset:912
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:908
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v14, 0xbf5f2ee2
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:904
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:900
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v16, off, s[0:3], 0 offset:896
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:892
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:888
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:884
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v19, off, s[0:3], 0 offset:880
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:876
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:872
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:868
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:864
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:860
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:856
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:852
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], 0 offset:848
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:844
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:840
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:836
+; GFX10_W32-MUBUF-NEXT:    buffer_load_dword v1, v6, s[0:3], 0 offen
+; GFX10_W32-MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; GFX10_W32-MUBUF-NEXT:    v_add_f32_e32 v0, v0, v1
+; GFX10_W32-MUBUF-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10_W32-MUBUF-NEXT:    ; return to shader part epilog
+;
+; GFX10_W64-MUBUF-LABEL: hs_main:
+; GFX10_W64-MUBUF:       ; %bb.0:
+; GFX10_W64-MUBUF-NEXT:    s_mov_b32 s0, SCRATCH_RSRC_DWORD0
+; GFX10_W64-MUBUF-NEXT:    s_mov_b32 s1, SCRATCH_RSRC_DWORD1
+; GFX10_W64-MUBUF-NEXT:    s_mov_b32 s2, -1
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v2, 0x3f3d349e
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v3, 0x3f523be1
+; GFX10_W64-MUBUF-NEXT:    s_mov_b32 s3, 0x31e16000
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v4, 0x3f5f2ee2
+; GFX10_W64-MUBUF-NEXT:    s_add_u32 s0, s0, s5
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v5, 0x3f638e37
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v7, 0xbefcd8a3
+; GFX10_W64-MUBUF-NEXT:    s_addc_u32 s1, s1, 0
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v8, 0xbefcd89f
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v9, 0xbeae29dc
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], 0 offset:448
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:444
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:440
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:436
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:432
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:428
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:424
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:420
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], 0 offset:416
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:412
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:408
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:404
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:400
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v1, 0xbe319356
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v7, 0xbe31934f
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v10, 0xb7043519
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v11, 0xb702e758
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v12, 0x3e31934f
+; GFX10_W64-MUBUF-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], 0 offset:396
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:392
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:388
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:384
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:380
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v7, 0x3e319356
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v10, 0x3eae29d8
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v11, 0x3eae29dc
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v12, 0x3efcd89c
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v13, 0x3efcd89f
+; GFX10_W64-MUBUF-NEXT:    v_and_b32_e32 v0, 0x1fc, v0
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:376
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:372
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:368
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:364
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:360
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v10, 0xbf20e7f5
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v15, 0xbf5f2ee3
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v12, 0xbf3d349e
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v14, 0xbf523be3
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v16, 0xbf638e39
+; GFX10_W64-MUBUF-NEXT:    v_add_nc_u32_e32 v6, 0x280, v0
+; GFX10_W64-MUBUF-NEXT:    v_add_nc_u32_e32 v0, 0x80, v0
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:356
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:352
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:348
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:344
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v16, off, s[0:3], 0 offset:340
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v17, 0x3f20e7f5
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:336
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:332
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v14, 0x3f20e7f4
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:328
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:324
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v18, 0x3703c499
+; GFX10_W64-MUBUF-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:960
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:956
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:952
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:948
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:944
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], 0 offset:940
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v14, 0x3f3d349c
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v19, 0xbf523be1
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:936
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:932
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:928
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:924
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:920
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:916
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v19, off, s[0:3], 0 offset:912
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:908
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v14, 0xbf5f2ee2
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:904
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:900
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v16, off, s[0:3], 0 offset:896
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:892
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:888
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:884
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v19, off, s[0:3], 0 offset:880
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:876
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:872
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:868
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:864
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:860
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:856
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:852
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], 0 offset:848
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:844
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:840
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:836
+; GFX10_W64-MUBUF-NEXT:    buffer_load_dword v1, v6, s[0:3], 0 offen
+; GFX10_W64-MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; GFX10_W64-MUBUF-NEXT:    v_add_f32_e32 v0, v0, v1
+; GFX10_W64-MUBUF-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10_W64-MUBUF-NEXT:    ; return to shader part epilog
+;
+; GFX9-FLATSCR-LABEL: hs_main:
+; GFX9-FLATSCR:       ; %bb.0:
+; GFX9-FLATSCR-NEXT:    s_add_u32 flat_scratch_lo, s0, s5
+; GFX9-FLATSCR-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-FLATSCR-NEXT:    s_addc_u32 flat_scratch_hi, s1, 0
+; GFX9-FLATSCR-NEXT:    v_and_b32_e32 v23, 0x1fc, v0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v0, 0xbf20e7f4
+; GFX9-FLATSCR-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v5, 0x3f5f2ee2
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v6, 0x3f523be1
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v7, 0x3f3d349e
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:448
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v4, 0x3f638e37
+; GFX9-FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v1, v7
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v2, v6
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v3, v5
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s2, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v12, 0x3eae29dc
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v13, 0x3eae29d8
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v14, 0x3e319356
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v15, 0x3e31934f
+; GFX9-FLATSCR-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[4:7], vcc_hi offset:432
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s2 offset:416
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[12:15], vcc_lo offset:368
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v0, 0xbeae29dc
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v15, 0xbf3d349e
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v16, 0xbf20e7f5
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v17, 0x3efcd89f
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v18, 0x3efcd89c
+; GFX9-FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v2, 0xbefcd89f
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v3, 0xbefcd8a3
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v1, v0
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s1, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v8, 0xb702e758
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v9, 0xb7043519
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v10, 0xbe31934f
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v11, 0xbe319356
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s0, 0
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[15:18], vcc_hi offset:352
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v21, 0xbf523be3
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v18, 0xbf5f2ee3
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s1 offset:400
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[8:11], s0 offset:384
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v19, 0xbf638e39
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v20, v18
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s2, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v8, 0x3f20e7f5
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v9, v15
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v10, v21
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s1, 0
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[18:21], s2 offset:336
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[7:10], s1 offset:320
+; GFX9-FLATSCR-NEXT:    v_add_u32_e32 v1, 0x80, v23
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v21, v18
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v20, v4
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v22, v5
+; GFX9-FLATSCR-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX9-FLATSCR-NEXT:    scratch_load_dword v13, v1, off
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v1, 0x3f20e7f4
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s0, 0
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[19:22], vcc_lo offset:896
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v18, 0xbf523be1
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v21, 0x3f3d349c
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v19, v6
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v20, v15
+; GFX9-FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[1:4], s0 offset:960
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[18:21], vcc_hi offset:912
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s3, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v20, 0xbf5f2ee2
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v21, v5
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v10, v7
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v7, v17
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s2, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v3, 0x3703c499
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v4, v14
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v5, v12
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v6, v17
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s1, 0
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[18:21], s3 offset:880
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[7:10], s2 offset:864
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[3:6], s1 offset:944
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v8, v16
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v9, v2
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v10, v0
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s0, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v4, v11
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v5, v3
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v6, v14
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v7, v12
+; GFX9-FLATSCR-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v17, v2
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v18, v0
+; GFX9-FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[8:11], s0 offset:928
+; GFX9-FLATSCR-NEXT:    v_add_u32_e32 v1, 0x280, v23
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[4:7], vcc_lo offset:848
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[15:18], vcc_hi offset:832
+; GFX9-FLATSCR-NEXT:    scratch_load_dword v0, v1, off
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    v_add_f32_e32 v0, v13, v0
+; GFX9-FLATSCR-NEXT:    ; return to shader part epilog
+;
+; GFX10-FLATSCR-LABEL: hs_main:
+; GFX10-FLATSCR:       ; %bb.0:
+; GFX10-FLATSCR-NEXT:    s_add_u32 s0, s0, s5
+; GFX10-FLATSCR-NEXT:    s_addc_u32 s1, s1, 0
+; GFX10-FLATSCR-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
+; GFX10-FLATSCR-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v6, 0x3f5f2ee2
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v8, 0x3f3d349e
+; GFX10-FLATSCR-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v7, 0x3f523be1
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v9, 0xbeae29dc
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v5, 0x3f638e37
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[1:4], off offset:448
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v2, v8
+; GFX10-FLATSCR-NEXT:    v_and_b32_e32 v0, 0x1fc, v0
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v3, v7
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v4, v6
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v11, 0xbefcd89f
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v12, 0xbefcd8a3
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v10, v9
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v16, 0xbf3d349e
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v20, 0xbf5f2ee3
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v23, 0xbf523be3
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[5:8], off offset:432
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[1:4], off offset:416
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[9:12], off offset:400
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v12, 0x3eae29dc
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v14, 0x3e319356
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v18, 0x3efcd89f
+; GFX10-FLATSCR-NEXT:    v_add_nc_u32_e32 v39, 0x280, v0
+; GFX10-FLATSCR-NEXT:    v_add_nc_u32_e32 v35, 0x80, v0
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v0, 0xb702e758
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v1, 0xb7043519
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v2, 0xbe31934f
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v3, 0xbe319356
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v17, 0xbf20e7f5
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v13, 0x3eae29d8
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v15, 0x3e31934f
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v19, 0x3efcd89c
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v21, 0xbf638e39
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v22, v20
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v24, 0x3f20e7f5
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v25, v16
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v26, v23
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v4, 0x3f20e7f4
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v27, 0x3703c499
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v28, v14
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v29, v12
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v30, v18
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], off offset:384
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[12:15], off offset:368
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[16:19], off offset:352
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[20:23], off offset:336
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:320
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v0, v17
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v1, v11
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v2, v9
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v31, 0xbf523be1
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v34, 0x3f3d349c
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v32, v7
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v33, v16
+; GFX10-FLATSCR-NEXT:    scratch_load_dword v10, v35, off
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v35, v21
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v36, v5
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v37, v20
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v38, v6
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[4:7], off offset:960
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[27:30], off offset:944
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], off offset:928
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[31:34], off offset:912
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v33, 0xbf5f2ee2
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v34, v6
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v23, v18
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v26, v8
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v0, v3
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v1, v27
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v2, v14
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v3, v12
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v18, v11
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v19, v9
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[35:38], off offset:896
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[31:34], off offset:880
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:864
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], off offset:848
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[16:19], off offset:832
+; GFX10-FLATSCR-NEXT:    scratch_load_dword v0, v39, off
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    v_add_f32_e32 v0, v10, v0
+; GFX10-FLATSCR-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-FLATSCR-NEXT:    ; return to shader part epilog
+;
+; GFX9-FLATSCR-PAL-LABEL: hs_main:
+; GFX9-FLATSCR-PAL:       ; %bb.0:
+; GFX9-FLATSCR-PAL-NEXT:    s_getpc_b64 s[0:1]
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s0, s8
+; GFX9-FLATSCR-PAL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
+; GFX9-FLATSCR-PAL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-FLATSCR-PAL-NEXT:    v_and_b32_e32 v23, 0x1fc, v0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v0, 0xbf20e7f4
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX9-FLATSCR-PAL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-FLATSCR-PAL-NEXT:    s_and_b32 s1, s1, 0xffff
+; GFX9-FLATSCR-PAL-NEXT:    s_add_u32 flat_scratch_lo, s0, s5
+; GFX9-FLATSCR-PAL-NEXT:    s_addc_u32 flat_scratch_hi, s1, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v5, 0x3f5f2ee2
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v6, 0x3f523be1
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v7, 0x3f3d349e
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:448
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v4, 0x3f638e37
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v1, v7
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v2, v6
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v3, v5
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s2, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v12, 0x3eae29dc
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v13, 0x3eae29d8
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v14, 0x3e319356
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v15, 0x3e31934f
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[4:7], vcc_hi offset:432
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s2 offset:416
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[12:15], vcc_lo offset:368
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v0, 0xbeae29dc
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v15, 0xbf3d349e
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v16, 0xbf20e7f5
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v17, 0x3efcd89f
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v18, 0x3efcd89c
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v2, 0xbefcd89f
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v3, 0xbefcd8a3
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v1, v0
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s1, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v8, 0xb702e758
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v9, 0xb7043519
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v10, 0xbe31934f
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v11, 0xbe319356
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s0, 0
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[15:18], vcc_hi offset:352
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v21, 0xbf523be3
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v18, 0xbf5f2ee3
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s1 offset:400
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[8:11], s0 offset:384
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v19, 0xbf638e39
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v20, v18
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s2, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v8, 0x3f20e7f5
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v9, v15
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v10, v21
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s1, 0
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[18:21], s2 offset:336
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[7:10], s1 offset:320
+; GFX9-FLATSCR-PAL-NEXT:    v_add_u32_e32 v1, 0x80, v23
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v21, v18
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v20, v4
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v22, v5
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX9-FLATSCR-PAL-NEXT:    scratch_load_dword v13, v1, off
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v1, 0x3f20e7f4
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s0, 0
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[19:22], vcc_lo offset:896
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v18, 0xbf523be1
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v21, 0x3f3d349c
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v19, v6
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v20, v15
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[1:4], s0 offset:960
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[18:21], vcc_hi offset:912
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s3, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v20, 0xbf5f2ee2
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v21, v5
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v10, v7
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v7, v17
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s2, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v3, 0x3703c499
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v4, v14
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v5, v12
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v6, v17
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s1, 0
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[18:21], s3 offset:880
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[7:10], s2 offset:864
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[3:6], s1 offset:944
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v8, v16
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v9, v2
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v10, v0
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s0, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v4, v11
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v5, v3
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v6, v14
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v7, v12
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v17, v2
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v18, v0
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[8:11], s0 offset:928
+; GFX9-FLATSCR-PAL-NEXT:    v_add_u32_e32 v1, 0x280, v23
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[4:7], vcc_lo offset:848
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[15:18], vcc_hi offset:832
+; GFX9-FLATSCR-PAL-NEXT:    scratch_load_dword v0, v1, off
+; GFX9-FLATSCR-PAL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-PAL-NEXT:    v_add_f32_e32 v0, v13, v0
+; GFX9-FLATSCR-PAL-NEXT:    ; return to shader part epilog
+;
+; GFX10-FLATSCR-PAL-LABEL: hs_main:
+; GFX10-FLATSCR-PAL:       ; %bb.0:
+; GFX10-FLATSCR-PAL-NEXT:    s_getpc_b64 s[0:1]
+; GFX10-FLATSCR-PAL-NEXT:    s_mov_b32 s0, s8
+; GFX10-FLATSCR-PAL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
+; GFX10-FLATSCR-PAL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-FLATSCR-PAL-NEXT:    s_and_b32 s1, s1, 0xffff
+; GFX10-FLATSCR-PAL-NEXT:    s_add_u32 s0, s0, s5
+; GFX10-FLATSCR-PAL-NEXT:    s_addc_u32 s1, s1, 0
+; GFX10-FLATSCR-PAL-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
+; GFX10-FLATSCR-PAL-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v6, 0x3f5f2ee2
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v8, 0x3f3d349e
+; GFX10-FLATSCR-PAL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v7, 0x3f523be1
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v9, 0xbeae29dc
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v5, 0x3f638e37
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[1:4], off offset:448
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v2, v8
+; GFX10-FLATSCR-PAL-NEXT:    v_and_b32_e32 v0, 0x1fc, v0
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v3, v7
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v4, v6
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v11, 0xbefcd89f
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v12, 0xbefcd8a3
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v10, v9
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v16, 0xbf3d349e
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v20, 0xbf5f2ee3
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v23, 0xbf523be3
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[5:8], off offset:432
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[1:4], off offset:416
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[9:12], off offset:400
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v12, 0x3eae29dc
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v14, 0x3e319356
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v18, 0x3efcd89f
+; GFX10-FLATSCR-PAL-NEXT:    v_add_nc_u32_e32 v39, 0x280, v0
+; GFX10-FLATSCR-PAL-NEXT:    v_add_nc_u32_e32 v35, 0x80, v0
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v0, 0xb702e758
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v1, 0xb7043519
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v2, 0xbe31934f
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v3, 0xbe319356
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v17, 0xbf20e7f5
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v13, 0x3eae29d8
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v15, 0x3e31934f
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v19, 0x3efcd89c
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v21, 0xbf638e39
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v22, v20
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v24, 0x3f20e7f5
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v25, v16
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v26, v23
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v4, 0x3f20e7f4
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v27, 0x3703c499
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v28, v14
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v29, v12
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v30, v18
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], off offset:384
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[12:15], off offset:368
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[16:19], off offset:352
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[20:23], off offset:336
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:320
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v0, v17
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v1, v11
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v2, v9
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v31, 0xbf523be1
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v34, 0x3f3d349c
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v32, v7
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v33, v16
+; GFX10-FLATSCR-PAL-NEXT:    scratch_load_dword v10, v35, off
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v35, v21
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v36, v5
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v37, v20
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v38, v6
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[4:7], off offset:960
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[27:30], off offset:944
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], off offset:928
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[31:34], off offset:912
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v33, 0xbf5f2ee2
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v34, v6
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v23, v18
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v26, v8
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v0, v3
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v1, v27
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v2, v14
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v3, v12
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v18, v11
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v19, v9
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[35:38], off offset:896
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[31:34], off offset:880
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:864
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], off offset:848
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[16:19], off offset:832
+; GFX10-FLATSCR-PAL-NEXT:    scratch_load_dword v0, v39, off
+; GFX10-FLATSCR-PAL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-PAL-NEXT:    v_add_f32_e32 v0, v10, v0
+; GFX10-FLATSCR-PAL-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-FLATSCR-PAL-NEXT:    ; return to shader part epilog
   %v1 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0xBFEA477C60000000, float 0xBFEBE5DC60000000, float 0xBFEC71C720000000, float 0xBFEBE5DC60000000, float 0xBFEA477C60000000, float 0xBFE7A693C0000000, float 0xBFE41CFEA0000000, float 0x3FDF9B13E0000000, float 0x3FDF9B1380000000, float 0x3FD5C53B80000000, float 0x3FD5C53B00000000, float 0x3FC6326AC0000000, float 0x3FC63269E0000000, float 0xBEE05CEB00000000, float 0xBEE086A320000000, float 0xBFC63269E0000000, float 0xBFC6326AC0000000, float 0xBFD5C53B80000000, float 0xBFD5C53B80000000, float 0xBFDF9B13E0000000, float 0xBFDF9B1460000000, float 0xBFE41CFE80000000, float 0x3FE7A693C0000000, float 0x3FEA477C20000000, float 0x3FEBE5DC40000000, float 0x3FEC71C6E0000000, float 0x3FEBE5DC40000000, float 0x3FEA477C20000000, float 0x3FE7A693C0000000, float 0xBFE41CFE80000000>, i32 %idx
   %v2 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0x3FE7A693C0000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFEBE5DC40000000, float 0x3FEBE5DC40000000, float 0xBFEC71C720000000, float 0x3FEC71C6E0000000, float 0xBFEBE5DC60000000, float 0x3FEBE5DC40000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFE7A693C0000000, float 0x3FE7A69380000000, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFE80000000>, i32 %idx
   %r = fadd float %v1, %v2
   ret float %r
 }
 
-; GCN-LABEL: {{^}}gs_main:
-; GFX9-FLATSCR: s_add_u32 flat_scratch_lo, s0, s5
-; GFX9-FLATSCR: s_addc_u32 flat_scratch_hi, s1, 0
-
-; GFX10-FLATSCR: s_add_u32 s0, s0, s5
-; GFX10-FLATSCR: s_addc_u32 s1, s1, 0
-; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
-; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
-
-; GFX9-FLATSCR-PAL-DAG: s_getpc_b64 s[0:1]
-; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s0, s8
-; GFX9-FLATSCR-PAL-DAG: s_load_dwordx2 s[0:1], s[0:1], 0x0
-; GFX9-FLATSCR-PAL-DAG: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-FLATSCR-PAL-DAG: v_mov_b32_e32 v0, 0xbf20e7f4
-; GFX9-FLATSCR-PAL-DAG: s_mov_b32 vcc_hi, 0
-; GFX9-FLATSCR-PAL-DAG: s_waitcnt lgkmcnt(0)
-; GFX9-FLATSCR-PAL-DAG: s_and_b32 s1, s1, 0xffff
-; GFX9-FLATSCR-PAL-DAG: s_add_u32 flat_scratch_lo, s0, s5
-; GFX9-FLATSCR-PAL-DAG: s_addc_u32 flat_scratch_hi, s1, 0
-
-; GFX10-FLATSCR-PAL: s_getpc_b64 s[0:1]
-; GFX10-FLATSCR-PAL: s_mov_b32 s0, s8
-; GFX10-FLATSCR-PAL: s_load_dwordx2 s[0:1], s[0:1], 0x0
-; GFX10-FLATSCR-PAL: s_waitcnt lgkmcnt(0)
-; GFX10-FLATSCR-PAL: s_and_b32 s1, s1, 0xffff
-; GFX10-FLATSCR-PAL: s_add_u32 s0, s0, s5
-; GFX10-FLATSCR-PAL: s_addc_u32 s1, s1, 0
-; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
-; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
-
-; SIVI: s_mov_b32 s0, SCRATCH_RSRC_DWORD0
-; SIVI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
-; SIVI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
-
-; GFX9_10-MUBUF: s_mov_b32 s0, SCRATCH_RSRC_DWORD0
-; GFX9_10-MUBUF: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
-; GFX9_10-MUBUF: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
-
-; FLATSCR-NOT: SCRATCH_RSRC_DWORD
-; FLATSCR: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off
-; FLATSCR: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off
 define amdgpu_gs float @gs_main(i32 %idx) {
+; SI-LABEL: gs_main:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_mov_b32 s4, s0
+; SI-NEXT:    s_mov_b32 s0, SCRATCH_RSRC_DWORD0
+; SI-NEXT:    s_mov_b32 s1, SCRATCH_RSRC_DWORD1
+; SI-NEXT:    s_mov_b32 s2, -1
+; SI-NEXT:    s_mov_b32 s3, 0xe8f000
+; SI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; SI-NEXT:    s_add_u32 s0, s0, s4
+; SI-NEXT:    v_and_b32_e32 v0, 0x1fc, v0
+; SI-NEXT:    v_mov_b32_e32 v2, 0x80
+; SI-NEXT:    s_addc_u32 s1, s1, 0
+; SI-NEXT:    v_add_i32_e32 v1, vcc, 0x280, v0
+; SI-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
+; SI-NEXT:    v_mov_b32_e32 v2, 0xbf20e7f4
+; SI-NEXT:    v_mov_b32_e32 v3, 0x3f3d349e
+; SI-NEXT:    v_mov_b32_e32 v4, 0x3f523be1
+; SI-NEXT:    v_mov_b32_e32 v5, 0x3f5f2ee2
+; SI-NEXT:    v_mov_b32_e32 v6, 0x3f638e37
+; SI-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:448
+; SI-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:444
+; SI-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:440
+; SI-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:436
+; SI-NEXT:    buffer_store_dword v6, off, s[0:3], 0 offset:432
+; SI-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:428
+; SI-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:424
+; SI-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:420
+; SI-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:416
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v2, 0xbefcd8a3
+; SI-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:412
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v2, 0xbefcd89f
+; SI-NEXT:    v_mov_b32_e32 v7, 0xbeae29dc
+; SI-NEXT:    v_mov_b32_e32 v9, 0xbe31934f
+; SI-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:408
+; SI-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:404
+; SI-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:400
+; SI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:392
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v9, 0xb7043519
+; SI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:388
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v9, 0xb702e758
+; SI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:384
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v9, 0x3e31934f
+; SI-NEXT:    v_mov_b32_e32 v10, 0x3eae29d8
+; SI-NEXT:    v_mov_b32_e32 v11, 0x3efcd89c
+; SI-NEXT:    v_mov_b32_e32 v8, 0xbe319356
+; SI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:380
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v9, 0x3e319356
+; SI-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:372
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v10, 0x3eae29dc
+; SI-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:364
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v11, 0x3efcd89f
+; SI-NEXT:    v_mov_b32_e32 v12, 0xbf20e7f5
+; SI-NEXT:    v_mov_b32_e32 v13, 0xbf3d349e
+; SI-NEXT:    v_mov_b32_e32 v14, 0xbf523be3
+; SI-NEXT:    v_mov_b32_e32 v15, 0xbf5f2ee3
+; SI-NEXT:    v_mov_b32_e32 v16, 0xbf638e39
+; SI-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:396
+; SI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:376
+; SI-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:368
+; SI-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:360
+; SI-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:356
+; SI-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:352
+; SI-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:348
+; SI-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:344
+; SI-NEXT:    buffer_store_dword v16, off, s[0:3], 0 offset:340
+; SI-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:336
+; SI-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:332
+; SI-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:328
+; SI-NEXT:    s_waitcnt expcnt(1)
+; SI-NEXT:    v_mov_b32_e32 v14, 0x3f20e7f5
+; SI-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:324
+; SI-NEXT:    v_mov_b32_e32 v17, 0x3f20e7f4
+; SI-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen
+; SI-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:960
+; SI-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:956
+; SI-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:952
+; SI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:948
+; SI-NEXT:    s_waitcnt expcnt(3)
+; SI-NEXT:    v_mov_b32_e32 v17, 0x3703c499
+; SI-NEXT:    v_mov_b32_e32 v18, 0x3f3d349c
+; SI-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:944
+; SI-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:940
+; SI-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:936
+; SI-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:932
+; SI-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:928
+; SI-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:924
+; SI-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:920
+; SI-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:916
+; SI-NEXT:    s_waitcnt expcnt(2)
+; SI-NEXT:    v_mov_b32_e32 v18, 0xbf523be1
+; SI-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:912
+; SI-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:908
+; SI-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:904
+; SI-NEXT:    buffer_store_dword v6, off, s[0:3], 0 offset:900
+; SI-NEXT:    buffer_store_dword v16, off, s[0:3], 0 offset:896
+; SI-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:892
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v5, 0xbf5f2ee2
+; SI-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:888
+; SI-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:884
+; SI-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:880
+; SI-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:876
+; SI-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:872
+; SI-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:868
+; SI-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:864
+; SI-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:860
+; SI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:856
+; SI-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:852
+; SI-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:848
+; SI-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:844
+; SI-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:840
+; SI-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:836
+; SI-NEXT:    buffer_load_dword v1, v1, s[0:3], 0 offen
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    v_add_f32_e32 v0, v0, v1
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    ; return to shader part epilog
+;
+; VI-LABEL: gs_main:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_mov_b32 s4, s0
+; VI-NEXT:    s_mov_b32 s0, SCRATCH_RSRC_DWORD0
+; VI-NEXT:    s_mov_b32 s1, SCRATCH_RSRC_DWORD1
+; VI-NEXT:    s_mov_b32 s2, -1
+; VI-NEXT:    s_mov_b32 s3, 0xe80000
+; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; VI-NEXT:    s_add_u32 s0, s0, s4
+; VI-NEXT:    v_and_b32_e32 v0, 0x1fc, v0
+; VI-NEXT:    v_mov_b32_e32 v2, 0x80
+; VI-NEXT:    s_addc_u32 s1, s1, 0
+; VI-NEXT:    v_add_u32_e32 v1, vcc, 0x280, v0
+; VI-NEXT:    v_add_u32_e32 v0, vcc, v2, v0
+; VI-NEXT:    v_mov_b32_e32 v2, 0xbf20e7f4
+; VI-NEXT:    v_mov_b32_e32 v3, 0x3f3d349e
+; VI-NEXT:    v_mov_b32_e32 v4, 0x3f523be1
+; VI-NEXT:    v_mov_b32_e32 v5, 0x3f5f2ee2
+; VI-NEXT:    v_mov_b32_e32 v6, 0x3f638e37
+; VI-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:448
+; VI-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:444
+; VI-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:440
+; VI-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:436
+; VI-NEXT:    buffer_store_dword v6, off, s[0:3], 0 offset:432
+; VI-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:428
+; VI-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:424
+; VI-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:420
+; VI-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:416
+; VI-NEXT:    v_mov_b32_e32 v2, 0xbefcd8a3
+; VI-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:412
+; VI-NEXT:    v_mov_b32_e32 v2, 0xbefcd89f
+; VI-NEXT:    v_mov_b32_e32 v7, 0xbeae29dc
+; VI-NEXT:    v_mov_b32_e32 v9, 0xbe31934f
+; VI-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:408
+; VI-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:404
+; VI-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:400
+; VI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:392
+; VI-NEXT:    v_mov_b32_e32 v9, 0xb7043519
+; VI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:388
+; VI-NEXT:    v_mov_b32_e32 v9, 0xb702e758
+; VI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:384
+; VI-NEXT:    v_mov_b32_e32 v9, 0x3e31934f
+; VI-NEXT:    v_mov_b32_e32 v10, 0x3eae29d8
+; VI-NEXT:    v_mov_b32_e32 v11, 0x3efcd89c
+; VI-NEXT:    v_mov_b32_e32 v8, 0xbe319356
+; VI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:380
+; VI-NEXT:    v_mov_b32_e32 v9, 0x3e319356
+; VI-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:372
+; VI-NEXT:    v_mov_b32_e32 v10, 0x3eae29dc
+; VI-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:364
+; VI-NEXT:    v_mov_b32_e32 v11, 0x3efcd89f
+; VI-NEXT:    v_mov_b32_e32 v12, 0xbf20e7f5
+; VI-NEXT:    v_mov_b32_e32 v13, 0xbf3d349e
+; VI-NEXT:    v_mov_b32_e32 v14, 0xbf523be3
+; VI-NEXT:    v_mov_b32_e32 v15, 0xbf5f2ee3
+; VI-NEXT:    v_mov_b32_e32 v16, 0xbf638e39
+; VI-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:396
+; VI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:376
+; VI-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:368
+; VI-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:360
+; VI-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:356
+; VI-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:352
+; VI-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:348
+; VI-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:344
+; VI-NEXT:    buffer_store_dword v16, off, s[0:3], 0 offset:340
+; VI-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:336
+; VI-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:332
+; VI-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:328
+; VI-NEXT:    v_mov_b32_e32 v14, 0x3f20e7f5
+; VI-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:324
+; VI-NEXT:    v_mov_b32_e32 v17, 0x3f20e7f4
+; VI-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen
+; VI-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:960
+; VI-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:956
+; VI-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:952
+; VI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:948
+; VI-NEXT:    v_mov_b32_e32 v17, 0x3703c499
+; VI-NEXT:    v_mov_b32_e32 v18, 0x3f3d349c
+; VI-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:944
+; VI-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:940
+; VI-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:936
+; VI-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:932
+; VI-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:928
+; VI-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:924
+; VI-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:920
+; VI-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:916
+; VI-NEXT:    v_mov_b32_e32 v18, 0xbf523be1
+; VI-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:912
+; VI-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:908
+; VI-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:904
+; VI-NEXT:    buffer_store_dword v6, off, s[0:3], 0 offset:900
+; VI-NEXT:    buffer_store_dword v16, off, s[0:3], 0 offset:896
+; VI-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:892
+; VI-NEXT:    v_mov_b32_e32 v5, 0xbf5f2ee2
+; VI-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:888
+; VI-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:884
+; VI-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:880
+; VI-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:876
+; VI-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:872
+; VI-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:868
+; VI-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:864
+; VI-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:860
+; VI-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:856
+; VI-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:852
+; VI-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:848
+; VI-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:844
+; VI-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:840
+; VI-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:836
+; VI-NEXT:    buffer_load_dword v1, v1, s[0:3], 0 offen
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    v_add_f32_e32 v0, v0, v1
+; VI-NEXT:    ; return to shader part epilog
+;
+; GFX9-MUBUF-LABEL: gs_main:
+; GFX9-MUBUF:       ; %bb.0:
+; GFX9-MUBUF-NEXT:    s_mov_b32 s0, SCRATCH_RSRC_DWORD0
+; GFX9-MUBUF-NEXT:    s_mov_b32 s1, SCRATCH_RSRC_DWORD1
+; GFX9-MUBUF-NEXT:    s_mov_b32 s2, -1
+; GFX9-MUBUF-NEXT:    s_mov_b32 s3, 0xe00000
+; GFX9-MUBUF-NEXT:    s_add_u32 s0, s0, s5
+; GFX9-MUBUF-NEXT:    s_addc_u32 s1, s1, 0
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v2, 0xbf20e7f4
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v3, 0x3f3d349e
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v4, 0x3f523be1
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v5, 0x3f5f2ee2
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v6, 0x3f638e37
+; GFX9-MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:448
+; GFX9-MUBUF-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:444
+; GFX9-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:440
+; GFX9-MUBUF-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:436
+; GFX9-MUBUF-NEXT:    buffer_store_dword v6, off, s[0:3], 0 offset:432
+; GFX9-MUBUF-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:428
+; GFX9-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:424
+; GFX9-MUBUF-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:420
+; GFX9-MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:416
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v2, 0xbefcd8a3
+; GFX9-MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:412
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v2, 0xbefcd89f
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v7, 0xbeae29dc
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v9, 0xbe31934f
+; GFX9-MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:408
+; GFX9-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:404
+; GFX9-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:400
+; GFX9-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:392
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v9, 0xb7043519
+; GFX9-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:388
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v9, 0xb702e758
+; GFX9-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:384
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v9, 0x3e31934f
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v10, 0x3eae29d8
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v11, 0x3efcd89c
+; GFX9-MUBUF-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v8, 0xbe319356
+; GFX9-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:380
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v9, 0x3e319356
+; GFX9-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:372
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v10, 0x3eae29dc
+; GFX9-MUBUF-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:364
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v11, 0x3efcd89f
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v12, 0xbf20e7f5
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v13, 0xbf3d349e
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v14, 0xbf523be3
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v15, 0xbf5f2ee3
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v16, 0xbf638e39
+; GFX9-MUBUF-NEXT:    v_and_b32_e32 v0, 0x1fc, v0
+; GFX9-MUBUF-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:396
+; GFX9-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:376
+; GFX9-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:368
+; GFX9-MUBUF-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:360
+; GFX9-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:356
+; GFX9-MUBUF-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:352
+; GFX9-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:348
+; GFX9-MUBUF-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:344
+; GFX9-MUBUF-NEXT:    buffer_store_dword v16, off, s[0:3], 0 offset:340
+; GFX9-MUBUF-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:336
+; GFX9-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:332
+; GFX9-MUBUF-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:328
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v14, 0x3f20e7f5
+; GFX9-MUBUF-NEXT:    v_add_u32_e32 v1, 0x280, v0
+; GFX9-MUBUF-NEXT:    v_add_u32_e32 v0, 0x80, v0
+; GFX9-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:324
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v17, 0x3f20e7f4
+; GFX9-MUBUF-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen
+; GFX9-MUBUF-NEXT:    s_nop 0
+; GFX9-MUBUF-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:960
+; GFX9-MUBUF-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:956
+; GFX9-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:952
+; GFX9-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:948
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v17, 0x3703c499
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v18, 0x3f3d349c
+; GFX9-MUBUF-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:944
+; GFX9-MUBUF-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:940
+; GFX9-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:936
+; GFX9-MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:932
+; GFX9-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:928
+; GFX9-MUBUF-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:924
+; GFX9-MUBUF-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:920
+; GFX9-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:916
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v18, 0xbf523be1
+; GFX9-MUBUF-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:912
+; GFX9-MUBUF-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:908
+; GFX9-MUBUF-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:904
+; GFX9-MUBUF-NEXT:    buffer_store_dword v6, off, s[0:3], 0 offset:900
+; GFX9-MUBUF-NEXT:    buffer_store_dword v16, off, s[0:3], 0 offset:896
+; GFX9-MUBUF-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:892
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v5, 0xbf5f2ee2
+; GFX9-MUBUF-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:888
+; GFX9-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:884
+; GFX9-MUBUF-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:880
+; GFX9-MUBUF-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:876
+; GFX9-MUBUF-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:872
+; GFX9-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:868
+; GFX9-MUBUF-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:864
+; GFX9-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:860
+; GFX9-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:856
+; GFX9-MUBUF-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:852
+; GFX9-MUBUF-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:848
+; GFX9-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:844
+; GFX9-MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:840
+; GFX9-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:836
+; GFX9-MUBUF-NEXT:    buffer_load_dword v1, v1, s[0:3], 0 offen
+; GFX9-MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-MUBUF-NEXT:    v_add_f32_e32 v0, v0, v1
+; GFX9-MUBUF-NEXT:    ; return to shader part epilog
+;
+; GFX10_W32-MUBUF-LABEL: gs_main:
+; GFX10_W32-MUBUF:       ; %bb.0:
+; GFX10_W32-MUBUF-NEXT:    s_mov_b32 s0, SCRATCH_RSRC_DWORD0
+; GFX10_W32-MUBUF-NEXT:    s_mov_b32 s1, SCRATCH_RSRC_DWORD1
+; GFX10_W32-MUBUF-NEXT:    s_mov_b32 s2, -1
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v2, 0x3f3d349e
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v3, 0x3f523be1
+; GFX10_W32-MUBUF-NEXT:    s_mov_b32 s3, 0x31c16000
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v4, 0x3f5f2ee2
+; GFX10_W32-MUBUF-NEXT:    s_add_u32 s0, s0, s5
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v5, 0x3f638e37
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v7, 0xbefcd8a3
+; GFX10_W32-MUBUF-NEXT:    s_addc_u32 s1, s1, 0
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v8, 0xbefcd89f
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v9, 0xbeae29dc
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], 0 offset:448
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:444
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:440
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:436
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:432
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:428
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:424
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:420
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], 0 offset:416
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:412
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:408
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:404
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:400
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v1, 0xbe319356
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v7, 0xbe31934f
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v10, 0xb7043519
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v11, 0xb702e758
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v12, 0x3e31934f
+; GFX10_W32-MUBUF-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], 0 offset:396
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:392
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:388
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:384
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:380
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v7, 0x3e319356
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v10, 0x3eae29d8
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v11, 0x3eae29dc
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v12, 0x3efcd89c
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v13, 0x3efcd89f
+; GFX10_W32-MUBUF-NEXT:    v_and_b32_e32 v0, 0x1fc, v0
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:376
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:372
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:368
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:364
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:360
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v10, 0xbf20e7f5
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v15, 0xbf5f2ee3
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v12, 0xbf3d349e
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v14, 0xbf523be3
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v16, 0xbf638e39
+; GFX10_W32-MUBUF-NEXT:    v_add_nc_u32_e32 v6, 0x280, v0
+; GFX10_W32-MUBUF-NEXT:    v_add_nc_u32_e32 v0, 0x80, v0
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:356
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:352
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:348
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:344
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v16, off, s[0:3], 0 offset:340
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v17, 0x3f20e7f5
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:336
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:332
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v14, 0x3f20e7f4
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:328
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:324
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v18, 0x3703c499
+; GFX10_W32-MUBUF-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:960
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:956
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:952
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:948
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:944
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], 0 offset:940
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v14, 0x3f3d349c
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v19, 0xbf523be1
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:936
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:932
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:928
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:924
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:920
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:916
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v19, off, s[0:3], 0 offset:912
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:908
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v14, 0xbf5f2ee2
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:904
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:900
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v16, off, s[0:3], 0 offset:896
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:892
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:888
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:884
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v19, off, s[0:3], 0 offset:880
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:876
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:872
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:868
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:864
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:860
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:856
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:852
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], 0 offset:848
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:844
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:840
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:836
+; GFX10_W32-MUBUF-NEXT:    buffer_load_dword v1, v6, s[0:3], 0 offen
+; GFX10_W32-MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; GFX10_W32-MUBUF-NEXT:    v_add_f32_e32 v0, v0, v1
+; GFX10_W32-MUBUF-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10_W32-MUBUF-NEXT:    ; return to shader part epilog
+;
+; GFX10_W64-MUBUF-LABEL: gs_main:
+; GFX10_W64-MUBUF:       ; %bb.0:
+; GFX10_W64-MUBUF-NEXT:    s_mov_b32 s0, SCRATCH_RSRC_DWORD0
+; GFX10_W64-MUBUF-NEXT:    s_mov_b32 s1, SCRATCH_RSRC_DWORD1
+; GFX10_W64-MUBUF-NEXT:    s_mov_b32 s2, -1
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v2, 0x3f3d349e
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v3, 0x3f523be1
+; GFX10_W64-MUBUF-NEXT:    s_mov_b32 s3, 0x31e16000
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v4, 0x3f5f2ee2
+; GFX10_W64-MUBUF-NEXT:    s_add_u32 s0, s0, s5
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v5, 0x3f638e37
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v7, 0xbefcd8a3
+; GFX10_W64-MUBUF-NEXT:    s_addc_u32 s1, s1, 0
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v8, 0xbefcd89f
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v9, 0xbeae29dc
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], 0 offset:448
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:444
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:440
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:436
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:432
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:428
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:424
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:420
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], 0 offset:416
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:412
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:408
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:404
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:400
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v1, 0xbe319356
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v7, 0xbe31934f
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v10, 0xb7043519
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v11, 0xb702e758
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v12, 0x3e31934f
+; GFX10_W64-MUBUF-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], 0 offset:396
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:392
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:388
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:384
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:380
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v7, 0x3e319356
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v10, 0x3eae29d8
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v11, 0x3eae29dc
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v12, 0x3efcd89c
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v13, 0x3efcd89f
+; GFX10_W64-MUBUF-NEXT:    v_and_b32_e32 v0, 0x1fc, v0
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:376
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:372
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:368
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:364
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:360
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v10, 0xbf20e7f5
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v15, 0xbf5f2ee3
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v12, 0xbf3d349e
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v14, 0xbf523be3
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v16, 0xbf638e39
+; GFX10_W64-MUBUF-NEXT:    v_add_nc_u32_e32 v6, 0x280, v0
+; GFX10_W64-MUBUF-NEXT:    v_add_nc_u32_e32 v0, 0x80, v0
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:356
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:352
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:348
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:344
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v16, off, s[0:3], 0 offset:340
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v17, 0x3f20e7f5
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:336
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:332
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v14, 0x3f20e7f4
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:328
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:324
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v18, 0x3703c499
+; GFX10_W64-MUBUF-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:960
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:956
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:952
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:948
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:944
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], 0 offset:940
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v14, 0x3f3d349c
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v19, 0xbf523be1
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:936
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:932
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:928
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:924
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:920
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:916
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v19, off, s[0:3], 0 offset:912
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:908
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v14, 0xbf5f2ee2
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:904
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:900
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v16, off, s[0:3], 0 offset:896
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:892
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:888
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:884
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v19, off, s[0:3], 0 offset:880
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:876
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:872
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:868
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:864
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:860
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:856
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:852
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], 0 offset:848
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:844
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:840
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:836
+; GFX10_W64-MUBUF-NEXT:    buffer_load_dword v1, v6, s[0:3], 0 offen
+; GFX10_W64-MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; GFX10_W64-MUBUF-NEXT:    v_add_f32_e32 v0, v0, v1
+; GFX10_W64-MUBUF-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10_W64-MUBUF-NEXT:    ; return to shader part epilog
+;
+; GFX9-FLATSCR-LABEL: gs_main:
+; GFX9-FLATSCR:       ; %bb.0:
+; GFX9-FLATSCR-NEXT:    s_add_u32 flat_scratch_lo, s0, s5
+; GFX9-FLATSCR-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-FLATSCR-NEXT:    s_addc_u32 flat_scratch_hi, s1, 0
+; GFX9-FLATSCR-NEXT:    v_and_b32_e32 v23, 0x1fc, v0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v0, 0xbf20e7f4
+; GFX9-FLATSCR-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v5, 0x3f5f2ee2
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v6, 0x3f523be1
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v7, 0x3f3d349e
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:448
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v4, 0x3f638e37
+; GFX9-FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v1, v7
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v2, v6
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v3, v5
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s2, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v12, 0x3eae29dc
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v13, 0x3eae29d8
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v14, 0x3e319356
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v15, 0x3e31934f
+; GFX9-FLATSCR-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[4:7], vcc_hi offset:432
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s2 offset:416
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[12:15], vcc_lo offset:368
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v0, 0xbeae29dc
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v15, 0xbf3d349e
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v16, 0xbf20e7f5
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v17, 0x3efcd89f
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v18, 0x3efcd89c
+; GFX9-FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v2, 0xbefcd89f
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v3, 0xbefcd8a3
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v1, v0
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s1, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v8, 0xb702e758
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v9, 0xb7043519
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v10, 0xbe31934f
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v11, 0xbe319356
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s0, 0
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[15:18], vcc_hi offset:352
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v21, 0xbf523be3
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v18, 0xbf5f2ee3
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s1 offset:400
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[8:11], s0 offset:384
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v19, 0xbf638e39
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v20, v18
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s2, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v8, 0x3f20e7f5
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v9, v15
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v10, v21
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s1, 0
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[18:21], s2 offset:336
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[7:10], s1 offset:320
+; GFX9-FLATSCR-NEXT:    v_add_u32_e32 v1, 0x80, v23
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v21, v18
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v20, v4
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v22, v5
+; GFX9-FLATSCR-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX9-FLATSCR-NEXT:    scratch_load_dword v13, v1, off
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v1, 0x3f20e7f4
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s0, 0
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[19:22], vcc_lo offset:896
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v18, 0xbf523be1
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v21, 0x3f3d349c
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v19, v6
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v20, v15
+; GFX9-FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[1:4], s0 offset:960
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[18:21], vcc_hi offset:912
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s3, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v20, 0xbf5f2ee2
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v21, v5
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v10, v7
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v7, v17
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s2, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v3, 0x3703c499
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v4, v14
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v5, v12
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v6, v17
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s1, 0
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[18:21], s3 offset:880
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[7:10], s2 offset:864
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[3:6], s1 offset:944
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v8, v16
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v9, v2
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v10, v0
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s0, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v4, v11
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v5, v3
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v6, v14
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v7, v12
+; GFX9-FLATSCR-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v17, v2
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v18, v0
+; GFX9-FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[8:11], s0 offset:928
+; GFX9-FLATSCR-NEXT:    v_add_u32_e32 v1, 0x280, v23
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[4:7], vcc_lo offset:848
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[15:18], vcc_hi offset:832
+; GFX9-FLATSCR-NEXT:    scratch_load_dword v0, v1, off
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    v_add_f32_e32 v0, v13, v0
+; GFX9-FLATSCR-NEXT:    ; return to shader part epilog
+;
+; GFX10-FLATSCR-LABEL: gs_main:
+; GFX10-FLATSCR:       ; %bb.0:
+; GFX10-FLATSCR-NEXT:    s_add_u32 s0, s0, s5
+; GFX10-FLATSCR-NEXT:    s_addc_u32 s1, s1, 0
+; GFX10-FLATSCR-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
+; GFX10-FLATSCR-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v6, 0x3f5f2ee2
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v8, 0x3f3d349e
+; GFX10-FLATSCR-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v7, 0x3f523be1
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v9, 0xbeae29dc
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v5, 0x3f638e37
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[1:4], off offset:448
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v2, v8
+; GFX10-FLATSCR-NEXT:    v_and_b32_e32 v0, 0x1fc, v0
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v3, v7
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v4, v6
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v11, 0xbefcd89f
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v12, 0xbefcd8a3
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v10, v9
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v16, 0xbf3d349e
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v20, 0xbf5f2ee3
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v23, 0xbf523be3
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[5:8], off offset:432
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[1:4], off offset:416
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[9:12], off offset:400
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v12, 0x3eae29dc
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v14, 0x3e319356
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v18, 0x3efcd89f
+; GFX10-FLATSCR-NEXT:    v_add_nc_u32_e32 v39, 0x280, v0
+; GFX10-FLATSCR-NEXT:    v_add_nc_u32_e32 v35, 0x80, v0
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v0, 0xb702e758
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v1, 0xb7043519
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v2, 0xbe31934f
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v3, 0xbe319356
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v17, 0xbf20e7f5
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v13, 0x3eae29d8
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v15, 0x3e31934f
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v19, 0x3efcd89c
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v21, 0xbf638e39
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v22, v20
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v24, 0x3f20e7f5
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v25, v16
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v26, v23
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v4, 0x3f20e7f4
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v27, 0x3703c499
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v28, v14
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v29, v12
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v30, v18
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], off offset:384
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[12:15], off offset:368
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[16:19], off offset:352
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[20:23], off offset:336
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:320
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v0, v17
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v1, v11
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v2, v9
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v31, 0xbf523be1
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v34, 0x3f3d349c
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v32, v7
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v33, v16
+; GFX10-FLATSCR-NEXT:    scratch_load_dword v10, v35, off
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v35, v21
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v36, v5
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v37, v20
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v38, v6
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[4:7], off offset:960
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[27:30], off offset:944
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], off offset:928
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[31:34], off offset:912
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v33, 0xbf5f2ee2
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v34, v6
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v23, v18
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v26, v8
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v0, v3
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v1, v27
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v2, v14
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v3, v12
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v18, v11
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v19, v9
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[35:38], off offset:896
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[31:34], off offset:880
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:864
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], off offset:848
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[16:19], off offset:832
+; GFX10-FLATSCR-NEXT:    scratch_load_dword v0, v39, off
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    v_add_f32_e32 v0, v10, v0
+; GFX10-FLATSCR-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-FLATSCR-NEXT:    ; return to shader part epilog
+;
+; GFX9-FLATSCR-PAL-LABEL: gs_main:
+; GFX9-FLATSCR-PAL:       ; %bb.0:
+; GFX9-FLATSCR-PAL-NEXT:    s_getpc_b64 s[0:1]
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s0, s8
+; GFX9-FLATSCR-PAL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
+; GFX9-FLATSCR-PAL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-FLATSCR-PAL-NEXT:    v_and_b32_e32 v23, 0x1fc, v0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v0, 0xbf20e7f4
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX9-FLATSCR-PAL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-FLATSCR-PAL-NEXT:    s_and_b32 s1, s1, 0xffff
+; GFX9-FLATSCR-PAL-NEXT:    s_add_u32 flat_scratch_lo, s0, s5
+; GFX9-FLATSCR-PAL-NEXT:    s_addc_u32 flat_scratch_hi, s1, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v5, 0x3f5f2ee2
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v6, 0x3f523be1
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v7, 0x3f3d349e
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:448
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v4, 0x3f638e37
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v1, v7
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v2, v6
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v3, v5
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s2, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v12, 0x3eae29dc
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v13, 0x3eae29d8
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v14, 0x3e319356
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v15, 0x3e31934f
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[4:7], vcc_hi offset:432
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s2 offset:416
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[12:15], vcc_lo offset:368
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v0, 0xbeae29dc
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v15, 0xbf3d349e
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v16, 0xbf20e7f5
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v17, 0x3efcd89f
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v18, 0x3efcd89c
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v2, 0xbefcd89f
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v3, 0xbefcd8a3
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v1, v0
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s1, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v8, 0xb702e758
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v9, 0xb7043519
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v10, 0xbe31934f
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v11, 0xbe319356
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s0, 0
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[15:18], vcc_hi offset:352
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v21, 0xbf523be3
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v18, 0xbf5f2ee3
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s1 offset:400
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[8:11], s0 offset:384
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v19, 0xbf638e39
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v20, v18
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s2, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v8, 0x3f20e7f5
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v9, v15
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v10, v21
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s1, 0
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[18:21], s2 offset:336
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[7:10], s1 offset:320
+; GFX9-FLATSCR-PAL-NEXT:    v_add_u32_e32 v1, 0x80, v23
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v21, v18
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v20, v4
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v22, v5
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX9-FLATSCR-PAL-NEXT:    scratch_load_dword v13, v1, off
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v1, 0x3f20e7f4
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s0, 0
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[19:22], vcc_lo offset:896
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v18, 0xbf523be1
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v21, 0x3f3d349c
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v19, v6
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v20, v15
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[1:4], s0 offset:960
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[18:21], vcc_hi offset:912
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s3, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v20, 0xbf5f2ee2
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v21, v5
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v10, v7
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v7, v17
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s2, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v3, 0x3703c499
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v4, v14
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v5, v12
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v6, v17
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s1, 0
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[18:21], s3 offset:880
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[7:10], s2 offset:864
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[3:6], s1 offset:944
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v8, v16
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v9, v2
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v10, v0
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s0, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v4, v11
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v5, v3
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v6, v14
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v7, v12
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v17, v2
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v18, v0
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[8:11], s0 offset:928
+; GFX9-FLATSCR-PAL-NEXT:    v_add_u32_e32 v1, 0x280, v23
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[4:7], vcc_lo offset:848
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[15:18], vcc_hi offset:832
+; GFX9-FLATSCR-PAL-NEXT:    scratch_load_dword v0, v1, off
+; GFX9-FLATSCR-PAL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-PAL-NEXT:    v_add_f32_e32 v0, v13, v0
+; GFX9-FLATSCR-PAL-NEXT:    ; return to shader part epilog
+;
+; GFX10-FLATSCR-PAL-LABEL: gs_main:
+; GFX10-FLATSCR-PAL:       ; %bb.0:
+; GFX10-FLATSCR-PAL-NEXT:    s_getpc_b64 s[0:1]
+; GFX10-FLATSCR-PAL-NEXT:    s_mov_b32 s0, s8
+; GFX10-FLATSCR-PAL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
+; GFX10-FLATSCR-PAL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-FLATSCR-PAL-NEXT:    s_and_b32 s1, s1, 0xffff
+; GFX10-FLATSCR-PAL-NEXT:    s_add_u32 s0, s0, s5
+; GFX10-FLATSCR-PAL-NEXT:    s_addc_u32 s1, s1, 0
+; GFX10-FLATSCR-PAL-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
+; GFX10-FLATSCR-PAL-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v6, 0x3f5f2ee2
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v8, 0x3f3d349e
+; GFX10-FLATSCR-PAL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v7, 0x3f523be1
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v9, 0xbeae29dc
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v5, 0x3f638e37
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[1:4], off offset:448
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v2, v8
+; GFX10-FLATSCR-PAL-NEXT:    v_and_b32_e32 v0, 0x1fc, v0
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v3, v7
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v4, v6
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v11, 0xbefcd89f
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v12, 0xbefcd8a3
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v10, v9
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v16, 0xbf3d349e
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v20, 0xbf5f2ee3
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v23, 0xbf523be3
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[5:8], off offset:432
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[1:4], off offset:416
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[9:12], off offset:400
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v12, 0x3eae29dc
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v14, 0x3e319356
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v18, 0x3efcd89f
+; GFX10-FLATSCR-PAL-NEXT:    v_add_nc_u32_e32 v39, 0x280, v0
+; GFX10-FLATSCR-PAL-NEXT:    v_add_nc_u32_e32 v35, 0x80, v0
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v0, 0xb702e758
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v1, 0xb7043519
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v2, 0xbe31934f
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v3, 0xbe319356
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v17, 0xbf20e7f5
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v13, 0x3eae29d8
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v15, 0x3e31934f
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v19, 0x3efcd89c
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v21, 0xbf638e39
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v22, v20
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v24, 0x3f20e7f5
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v25, v16
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v26, v23
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v4, 0x3f20e7f4
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v27, 0x3703c499
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v28, v14
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v29, v12
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v30, v18
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], off offset:384
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[12:15], off offset:368
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[16:19], off offset:352
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[20:23], off offset:336
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:320
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v0, v17
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v1, v11
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v2, v9
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v31, 0xbf523be1
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v34, 0x3f3d349c
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v32, v7
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v33, v16
+; GFX10-FLATSCR-PAL-NEXT:    scratch_load_dword v10, v35, off
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v35, v21
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v36, v5
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v37, v20
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v38, v6
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[4:7], off offset:960
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[27:30], off offset:944
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], off offset:928
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[31:34], off offset:912
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v33, 0xbf5f2ee2
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v34, v6
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v23, v18
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v26, v8
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v0, v3
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v1, v27
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v2, v14
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v3, v12
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v18, v11
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v19, v9
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[35:38], off offset:896
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[31:34], off offset:880
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:864
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], off offset:848
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[16:19], off offset:832
+; GFX10-FLATSCR-PAL-NEXT:    scratch_load_dword v0, v39, off
+; GFX10-FLATSCR-PAL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-PAL-NEXT:    v_add_f32_e32 v0, v10, v0
+; GFX10-FLATSCR-PAL-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-FLATSCR-PAL-NEXT:    ; return to shader part epilog
   %v1 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0xBFEA477C60000000, float 0xBFEBE5DC60000000, float 0xBFEC71C720000000, float 0xBFEBE5DC60000000, float 0xBFEA477C60000000, float 0xBFE7A693C0000000, float 0xBFE41CFEA0000000, float 0x3FDF9B13E0000000, float 0x3FDF9B1380000000, float 0x3FD5C53B80000000, float 0x3FD5C53B00000000, float 0x3FC6326AC0000000, float 0x3FC63269E0000000, float 0xBEE05CEB00000000, float 0xBEE086A320000000, float 0xBFC63269E0000000, float 0xBFC6326AC0000000, float 0xBFD5C53B80000000, float 0xBFD5C53B80000000, float 0xBFDF9B13E0000000, float 0xBFDF9B1460000000, float 0xBFE41CFE80000000, float 0x3FE7A693C0000000, float 0x3FEA477C20000000, float 0x3FEBE5DC40000000, float 0x3FEC71C6E0000000, float 0x3FEBE5DC40000000, float 0x3FEA477C20000000, float 0x3FE7A693C0000000, float 0xBFE41CFE80000000>, i32 %idx
   %v2 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0x3FE7A693C0000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFEBE5DC40000000, float 0x3FEBE5DC40000000, float 0xBFEC71C720000000, float 0x3FEC71C6E0000000, float 0xBFEBE5DC60000000, float 0x3FEBE5DC40000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFE7A693C0000000, float 0x3FE7A69380000000, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFE80000000>, i32 %idx
   %r = fadd float %v1, %v2
   ret float %r
 }
 
-; Mesa GS and HS shaders have the preloaded scratch wave offset SGPR fixed at
-; SGPR5, and the inreg implementation is used to reference it in the IR. The
-; following tests confirm the shader and anything inserted after the return
-; (i.e. SI_RETURN_TO_EPILOG) can access the scratch wave offset.
-
-; GCN-LABEL: {{^}}hs_ir_uses_scratch_offset:
-; GFX9-FLATSCR: s_add_u32 flat_scratch_lo, s0, s5
-; GFX9-FLATSCR: s_addc_u32 flat_scratch_hi, s1, 0
-
-; GFX10-FLATSCR: s_add_u32 s0, s0, s5
-; GFX10-FLATSCR: s_addc_u32 s1, s1, 0
-; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
-; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
-
-; GFX9-FLATSCR-PAL-DAG: s_getpc_b64 s[0:1]
-; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s0, s8
-; GFX9-FLATSCR-PAL-DAG: s_load_dwordx2 s[0:1], s[0:1], 0x0
-; GFX9-FLATSCR-PAL-DAG: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-FLATSCR-PAL-DAG: v_mov_b32_e32 v0, 0xbf20e7f4
-; GFX9-FLATSCR-PAL-DAG: s_mov_b32 vcc_hi, 0
-; GFX9-FLATSCR-PAL-DAG: s_waitcnt lgkmcnt(0)
-; GFX9-FLATSCR-PAL-DAG: s_and_b32 s1, s1, 0xffff
-; GFX9-FLATSCR-PAL-DAG: s_add_u32 flat_scratch_lo, s0, s5
-; GFX9-FLATSCR-PAL-DAG: s_addc_u32 flat_scratch_hi, s1, 0
-
-; GFX10-FLATSCR-PAL: s_getpc_b64 s[0:1]
-; GFX10-FLATSCR-PAL: s_mov_b32 s0, s8
-; GFX10-FLATSCR-PAL: s_load_dwordx2 s[0:1], s[0:1], 0x0
-; GFX10-FLATSCR-PAL: s_waitcnt lgkmcnt(0)
-; GFX10-FLATSCR-PAL: s_and_b32 s1, s1, 0xffff
-; GFX10-FLATSCR-PAL: s_add_u32 s0, s0, s5
-; GFX10-FLATSCR-PAL: s_addc_u32 s1, s1, 0
-; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
-; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
-
-; MUBUF: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
-; FLATSCR-NOT: SCRATCH_RSRC_DWORD
-
-; SIVI-NOT: s_mov_b32 s6
-; SIVI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
-; SIVI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
-
-; GFX9_10-NOT: s_mov_b32 s5
-; GFX9_10-MUBUF-DAG: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
-; GFX9_10-MUBUF-DAG: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
-
-; MUBUF-DAG: s_mov_b32 s2, s5
-
-; FLATSCR-DAG: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off
-; FLATSCR-DAG: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off
 define amdgpu_hs <{i32, i32, i32, float}> @hs_ir_uses_scratch_offset(i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg %swo, i32 %idx) {
+; SI-LABEL: hs_ir_uses_scratch_offset:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_mov_b32 s8, SCRATCH_RSRC_DWORD0
+; SI-NEXT:    s_mov_b32 s9, SCRATCH_RSRC_DWORD1
+; SI-NEXT:    s_mov_b32 s10, -1
+; SI-NEXT:    s_mov_b32 s11, 0xe8f000
+; SI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; SI-NEXT:    s_add_u32 s8, s8, s6
+; SI-NEXT:    v_and_b32_e32 v0, 0x1fc, v0
+; SI-NEXT:    v_mov_b32_e32 v2, 0x80
+; SI-NEXT:    s_addc_u32 s9, s9, 0
+; SI-NEXT:    v_add_i32_e32 v1, vcc, 0x280, v0
+; SI-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
+; SI-NEXT:    v_mov_b32_e32 v2, 0xbf20e7f4
+; SI-NEXT:    v_mov_b32_e32 v3, 0x3f3d349e
+; SI-NEXT:    v_mov_b32_e32 v4, 0x3f523be1
+; SI-NEXT:    v_mov_b32_e32 v5, 0x3f5f2ee2
+; SI-NEXT:    v_mov_b32_e32 v6, 0x3f638e37
+; SI-NEXT:    buffer_store_dword v2, off, s[8:11], 0 offset:448
+; SI-NEXT:    buffer_store_dword v3, off, s[8:11], 0 offset:444
+; SI-NEXT:    buffer_store_dword v4, off, s[8:11], 0 offset:440
+; SI-NEXT:    buffer_store_dword v5, off, s[8:11], 0 offset:436
+; SI-NEXT:    buffer_store_dword v6, off, s[8:11], 0 offset:432
+; SI-NEXT:    buffer_store_dword v5, off, s[8:11], 0 offset:428
+; SI-NEXT:    buffer_store_dword v4, off, s[8:11], 0 offset:424
+; SI-NEXT:    buffer_store_dword v3, off, s[8:11], 0 offset:420
+; SI-NEXT:    buffer_store_dword v2, off, s[8:11], 0 offset:416
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v2, 0xbefcd8a3
+; SI-NEXT:    buffer_store_dword v2, off, s[8:11], 0 offset:412
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v2, 0xbefcd89f
+; SI-NEXT:    v_mov_b32_e32 v7, 0xbeae29dc
+; SI-NEXT:    v_mov_b32_e32 v9, 0xbe31934f
+; SI-NEXT:    buffer_store_dword v2, off, s[8:11], 0 offset:408
+; SI-NEXT:    buffer_store_dword v7, off, s[8:11], 0 offset:404
+; SI-NEXT:    buffer_store_dword v7, off, s[8:11], 0 offset:400
+; SI-NEXT:    buffer_store_dword v9, off, s[8:11], 0 offset:392
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v9, 0xb7043519
+; SI-NEXT:    buffer_store_dword v9, off, s[8:11], 0 offset:388
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v9, 0xb702e758
+; SI-NEXT:    buffer_store_dword v9, off, s[8:11], 0 offset:384
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v9, 0x3e31934f
+; SI-NEXT:    v_mov_b32_e32 v10, 0x3eae29d8
+; SI-NEXT:    v_mov_b32_e32 v11, 0x3efcd89c
+; SI-NEXT:    v_mov_b32_e32 v8, 0xbe319356
+; SI-NEXT:    buffer_store_dword v9, off, s[8:11], 0 offset:380
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v9, 0x3e319356
+; SI-NEXT:    buffer_store_dword v10, off, s[8:11], 0 offset:372
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v10, 0x3eae29dc
+; SI-NEXT:    buffer_store_dword v11, off, s[8:11], 0 offset:364
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v11, 0x3efcd89f
+; SI-NEXT:    v_mov_b32_e32 v12, 0xbf20e7f5
+; SI-NEXT:    v_mov_b32_e32 v13, 0xbf3d349e
+; SI-NEXT:    v_mov_b32_e32 v14, 0xbf523be3
+; SI-NEXT:    v_mov_b32_e32 v15, 0xbf5f2ee3
+; SI-NEXT:    v_mov_b32_e32 v16, 0xbf638e39
+; SI-NEXT:    buffer_store_dword v8, off, s[8:11], 0 offset:396
+; SI-NEXT:    buffer_store_dword v9, off, s[8:11], 0 offset:376
+; SI-NEXT:    buffer_store_dword v10, off, s[8:11], 0 offset:368
+; SI-NEXT:    buffer_store_dword v11, off, s[8:11], 0 offset:360
+; SI-NEXT:    buffer_store_dword v12, off, s[8:11], 0 offset:356
+; SI-NEXT:    buffer_store_dword v13, off, s[8:11], 0 offset:352
+; SI-NEXT:    buffer_store_dword v14, off, s[8:11], 0 offset:348
+; SI-NEXT:    buffer_store_dword v15, off, s[8:11], 0 offset:344
+; SI-NEXT:    buffer_store_dword v16, off, s[8:11], 0 offset:340
+; SI-NEXT:    buffer_store_dword v15, off, s[8:11], 0 offset:336
+; SI-NEXT:    buffer_store_dword v14, off, s[8:11], 0 offset:332
+; SI-NEXT:    buffer_store_dword v13, off, s[8:11], 0 offset:328
+; SI-NEXT:    s_waitcnt expcnt(1)
+; SI-NEXT:    v_mov_b32_e32 v14, 0x3f20e7f5
+; SI-NEXT:    buffer_store_dword v14, off, s[8:11], 0 offset:324
+; SI-NEXT:    v_mov_b32_e32 v17, 0x3f20e7f4
+; SI-NEXT:    buffer_load_dword v0, v0, s[8:11], 0 offen
+; SI-NEXT:    buffer_store_dword v17, off, s[8:11], 0 offset:960
+; SI-NEXT:    buffer_store_dword v11, off, s[8:11], 0 offset:956
+; SI-NEXT:    buffer_store_dword v10, off, s[8:11], 0 offset:952
+; SI-NEXT:    buffer_store_dword v9, off, s[8:11], 0 offset:948
+; SI-NEXT:    s_waitcnt expcnt(3)
+; SI-NEXT:    v_mov_b32_e32 v17, 0x3703c499
+; SI-NEXT:    v_mov_b32_e32 v18, 0x3f3d349c
+; SI-NEXT:    buffer_store_dword v17, off, s[8:11], 0 offset:944
+; SI-NEXT:    buffer_store_dword v8, off, s[8:11], 0 offset:940
+; SI-NEXT:    buffer_store_dword v7, off, s[8:11], 0 offset:936
+; SI-NEXT:    buffer_store_dword v2, off, s[8:11], 0 offset:932
+; SI-NEXT:    buffer_store_dword v12, off, s[8:11], 0 offset:928
+; SI-NEXT:    buffer_store_dword v18, off, s[8:11], 0 offset:924
+; SI-NEXT:    buffer_store_dword v13, off, s[8:11], 0 offset:920
+; SI-NEXT:    buffer_store_dword v4, off, s[8:11], 0 offset:916
+; SI-NEXT:    s_waitcnt expcnt(2)
+; SI-NEXT:    v_mov_b32_e32 v18, 0xbf523be1
+; SI-NEXT:    buffer_store_dword v18, off, s[8:11], 0 offset:912
+; SI-NEXT:    buffer_store_dword v5, off, s[8:11], 0 offset:908
+; SI-NEXT:    buffer_store_dword v15, off, s[8:11], 0 offset:904
+; SI-NEXT:    buffer_store_dword v6, off, s[8:11], 0 offset:900
+; SI-NEXT:    buffer_store_dword v16, off, s[8:11], 0 offset:896
+; SI-NEXT:    buffer_store_dword v5, off, s[8:11], 0 offset:892
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v5, 0xbf5f2ee2
+; SI-NEXT:    buffer_store_dword v5, off, s[8:11], 0 offset:888
+; SI-NEXT:    buffer_store_dword v4, off, s[8:11], 0 offset:884
+; SI-NEXT:    buffer_store_dword v18, off, s[8:11], 0 offset:880
+; SI-NEXT:    buffer_store_dword v3, off, s[8:11], 0 offset:876
+; SI-NEXT:    buffer_store_dword v13, off, s[8:11], 0 offset:872
+; SI-NEXT:    buffer_store_dword v14, off, s[8:11], 0 offset:868
+; SI-NEXT:    buffer_store_dword v11, off, s[8:11], 0 offset:864
+; SI-NEXT:    buffer_store_dword v10, off, s[8:11], 0 offset:860
+; SI-NEXT:    buffer_store_dword v9, off, s[8:11], 0 offset:856
+; SI-NEXT:    buffer_store_dword v17, off, s[8:11], 0 offset:852
+; SI-NEXT:    buffer_store_dword v8, off, s[8:11], 0 offset:848
+; SI-NEXT:    buffer_store_dword v7, off, s[8:11], 0 offset:844
+; SI-NEXT:    buffer_store_dword v2, off, s[8:11], 0 offset:840
+; SI-NEXT:    buffer_store_dword v12, off, s[8:11], 0 offset:836
+; SI-NEXT:    buffer_load_dword v1, v1, s[8:11], 0 offen
+; SI-NEXT:    s_mov_b32 s2, s5
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    v_add_f32_e32 v0, v0, v1
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    ; return to shader part epilog
+;
+; VI-LABEL: hs_ir_uses_scratch_offset:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_mov_b32 s8, SCRATCH_RSRC_DWORD0
+; VI-NEXT:    s_mov_b32 s9, SCRATCH_RSRC_DWORD1
+; VI-NEXT:    s_mov_b32 s10, -1
+; VI-NEXT:    s_mov_b32 s11, 0xe80000
+; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; VI-NEXT:    s_add_u32 s8, s8, s6
+; VI-NEXT:    v_and_b32_e32 v0, 0x1fc, v0
+; VI-NEXT:    v_mov_b32_e32 v2, 0x80
+; VI-NEXT:    s_addc_u32 s9, s9, 0
+; VI-NEXT:    v_add_u32_e32 v1, vcc, 0x280, v0
+; VI-NEXT:    v_add_u32_e32 v0, vcc, v2, v0
+; VI-NEXT:    v_mov_b32_e32 v2, 0xbf20e7f4
+; VI-NEXT:    v_mov_b32_e32 v3, 0x3f3d349e
+; VI-NEXT:    v_mov_b32_e32 v4, 0x3f523be1
+; VI-NEXT:    v_mov_b32_e32 v5, 0x3f5f2ee2
+; VI-NEXT:    v_mov_b32_e32 v6, 0x3f638e37
+; VI-NEXT:    buffer_store_dword v2, off, s[8:11], 0 offset:448
+; VI-NEXT:    buffer_store_dword v3, off, s[8:11], 0 offset:444
+; VI-NEXT:    buffer_store_dword v4, off, s[8:11], 0 offset:440
+; VI-NEXT:    buffer_store_dword v5, off, s[8:11], 0 offset:436
+; VI-NEXT:    buffer_store_dword v6, off, s[8:11], 0 offset:432
+; VI-NEXT:    buffer_store_dword v5, off, s[8:11], 0 offset:428
+; VI-NEXT:    buffer_store_dword v4, off, s[8:11], 0 offset:424
+; VI-NEXT:    buffer_store_dword v3, off, s[8:11], 0 offset:420
+; VI-NEXT:    buffer_store_dword v2, off, s[8:11], 0 offset:416
+; VI-NEXT:    v_mov_b32_e32 v2, 0xbefcd8a3
+; VI-NEXT:    buffer_store_dword v2, off, s[8:11], 0 offset:412
+; VI-NEXT:    v_mov_b32_e32 v2, 0xbefcd89f
+; VI-NEXT:    v_mov_b32_e32 v7, 0xbeae29dc
+; VI-NEXT:    v_mov_b32_e32 v9, 0xbe31934f
+; VI-NEXT:    buffer_store_dword v2, off, s[8:11], 0 offset:408
+; VI-NEXT:    buffer_store_dword v7, off, s[8:11], 0 offset:404
+; VI-NEXT:    buffer_store_dword v7, off, s[8:11], 0 offset:400
+; VI-NEXT:    buffer_store_dword v9, off, s[8:11], 0 offset:392
+; VI-NEXT:    v_mov_b32_e32 v9, 0xb7043519
+; VI-NEXT:    buffer_store_dword v9, off, s[8:11], 0 offset:388
+; VI-NEXT:    v_mov_b32_e32 v9, 0xb702e758
+; VI-NEXT:    buffer_store_dword v9, off, s[8:11], 0 offset:384
+; VI-NEXT:    v_mov_b32_e32 v9, 0x3e31934f
+; VI-NEXT:    v_mov_b32_e32 v10, 0x3eae29d8
+; VI-NEXT:    v_mov_b32_e32 v11, 0x3efcd89c
+; VI-NEXT:    v_mov_b32_e32 v8, 0xbe319356
+; VI-NEXT:    buffer_store_dword v9, off, s[8:11], 0 offset:380
+; VI-NEXT:    v_mov_b32_e32 v9, 0x3e319356
+; VI-NEXT:    buffer_store_dword v10, off, s[8:11], 0 offset:372
+; VI-NEXT:    v_mov_b32_e32 v10, 0x3eae29dc
+; VI-NEXT:    buffer_store_dword v11, off, s[8:11], 0 offset:364
+; VI-NEXT:    v_mov_b32_e32 v11, 0x3efcd89f
+; VI-NEXT:    v_mov_b32_e32 v12, 0xbf20e7f5
+; VI-NEXT:    v_mov_b32_e32 v13, 0xbf3d349e
+; VI-NEXT:    v_mov_b32_e32 v14, 0xbf523be3
+; VI-NEXT:    v_mov_b32_e32 v15, 0xbf5f2ee3
+; VI-NEXT:    v_mov_b32_e32 v16, 0xbf638e39
+; VI-NEXT:    buffer_store_dword v8, off, s[8:11], 0 offset:396
+; VI-NEXT:    buffer_store_dword v9, off, s[8:11], 0 offset:376
+; VI-NEXT:    buffer_store_dword v10, off, s[8:11], 0 offset:368
+; VI-NEXT:    buffer_store_dword v11, off, s[8:11], 0 offset:360
+; VI-NEXT:    buffer_store_dword v12, off, s[8:11], 0 offset:356
+; VI-NEXT:    buffer_store_dword v13, off, s[8:11], 0 offset:352
+; VI-NEXT:    buffer_store_dword v14, off, s[8:11], 0 offset:348
+; VI-NEXT:    buffer_store_dword v15, off, s[8:11], 0 offset:344
+; VI-NEXT:    buffer_store_dword v16, off, s[8:11], 0 offset:340
+; VI-NEXT:    buffer_store_dword v15, off, s[8:11], 0 offset:336
+; VI-NEXT:    buffer_store_dword v14, off, s[8:11], 0 offset:332
+; VI-NEXT:    buffer_store_dword v13, off, s[8:11], 0 offset:328
+; VI-NEXT:    v_mov_b32_e32 v14, 0x3f20e7f5
+; VI-NEXT:    buffer_store_dword v14, off, s[8:11], 0 offset:324
+; VI-NEXT:    v_mov_b32_e32 v17, 0x3f20e7f4
+; VI-NEXT:    buffer_load_dword v0, v0, s[8:11], 0 offen
+; VI-NEXT:    buffer_store_dword v17, off, s[8:11], 0 offset:960
+; VI-NEXT:    buffer_store_dword v11, off, s[8:11], 0 offset:956
+; VI-NEXT:    buffer_store_dword v10, off, s[8:11], 0 offset:952
+; VI-NEXT:    buffer_store_dword v9, off, s[8:11], 0 offset:948
+; VI-NEXT:    v_mov_b32_e32 v17, 0x3703c499
+; VI-NEXT:    v_mov_b32_e32 v18, 0x3f3d349c
+; VI-NEXT:    buffer_store_dword v17, off, s[8:11], 0 offset:944
+; VI-NEXT:    buffer_store_dword v8, off, s[8:11], 0 offset:940
+; VI-NEXT:    buffer_store_dword v7, off, s[8:11], 0 offset:936
+; VI-NEXT:    buffer_store_dword v2, off, s[8:11], 0 offset:932
+; VI-NEXT:    buffer_store_dword v12, off, s[8:11], 0 offset:928
+; VI-NEXT:    buffer_store_dword v18, off, s[8:11], 0 offset:924
+; VI-NEXT:    buffer_store_dword v13, off, s[8:11], 0 offset:920
+; VI-NEXT:    buffer_store_dword v4, off, s[8:11], 0 offset:916
+; VI-NEXT:    v_mov_b32_e32 v18, 0xbf523be1
+; VI-NEXT:    buffer_store_dword v18, off, s[8:11], 0 offset:912
+; VI-NEXT:    buffer_store_dword v5, off, s[8:11], 0 offset:908
+; VI-NEXT:    buffer_store_dword v15, off, s[8:11], 0 offset:904
+; VI-NEXT:    buffer_store_dword v6, off, s[8:11], 0 offset:900
+; VI-NEXT:    buffer_store_dword v16, off, s[8:11], 0 offset:896
+; VI-NEXT:    buffer_store_dword v5, off, s[8:11], 0 offset:892
+; VI-NEXT:    v_mov_b32_e32 v5, 0xbf5f2ee2
+; VI-NEXT:    buffer_store_dword v5, off, s[8:11], 0 offset:888
+; VI-NEXT:    buffer_store_dword v4, off, s[8:11], 0 offset:884
+; VI-NEXT:    buffer_store_dword v18, off, s[8:11], 0 offset:880
+; VI-NEXT:    buffer_store_dword v3, off, s[8:11], 0 offset:876
+; VI-NEXT:    buffer_store_dword v13, off, s[8:11], 0 offset:872
+; VI-NEXT:    buffer_store_dword v14, off, s[8:11], 0 offset:868
+; VI-NEXT:    buffer_store_dword v11, off, s[8:11], 0 offset:864
+; VI-NEXT:    buffer_store_dword v10, off, s[8:11], 0 offset:860
+; VI-NEXT:    buffer_store_dword v9, off, s[8:11], 0 offset:856
+; VI-NEXT:    buffer_store_dword v17, off, s[8:11], 0 offset:852
+; VI-NEXT:    buffer_store_dword v8, off, s[8:11], 0 offset:848
+; VI-NEXT:    buffer_store_dword v7, off, s[8:11], 0 offset:844
+; VI-NEXT:    buffer_store_dword v2, off, s[8:11], 0 offset:840
+; VI-NEXT:    buffer_store_dword v12, off, s[8:11], 0 offset:836
+; VI-NEXT:    buffer_load_dword v1, v1, s[8:11], 0 offen
+; VI-NEXT:    s_mov_b32 s2, s5
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    v_add_f32_e32 v0, v0, v1
+; VI-NEXT:    ; return to shader part epilog
+;
+; GFX9-MUBUF-LABEL: hs_ir_uses_scratch_offset:
+; GFX9-MUBUF:       ; %bb.0:
+; GFX9-MUBUF-NEXT:    s_mov_b32 s8, SCRATCH_RSRC_DWORD0
+; GFX9-MUBUF-NEXT:    s_mov_b32 s9, SCRATCH_RSRC_DWORD1
+; GFX9-MUBUF-NEXT:    s_mov_b32 s10, -1
+; GFX9-MUBUF-NEXT:    s_mov_b32 s11, 0xe00000
+; GFX9-MUBUF-NEXT:    s_add_u32 s8, s8, s5
+; GFX9-MUBUF-NEXT:    s_addc_u32 s9, s9, 0
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v2, 0xbf20e7f4
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v3, 0x3f3d349e
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v4, 0x3f523be1
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v5, 0x3f5f2ee2
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v6, 0x3f638e37
+; GFX9-MUBUF-NEXT:    buffer_store_dword v2, off, s[8:11], 0 offset:448
+; GFX9-MUBUF-NEXT:    buffer_store_dword v3, off, s[8:11], 0 offset:444
+; GFX9-MUBUF-NEXT:    buffer_store_dword v4, off, s[8:11], 0 offset:440
+; GFX9-MUBUF-NEXT:    buffer_store_dword v5, off, s[8:11], 0 offset:436
+; GFX9-MUBUF-NEXT:    buffer_store_dword v6, off, s[8:11], 0 offset:432
+; GFX9-MUBUF-NEXT:    buffer_store_dword v5, off, s[8:11], 0 offset:428
+; GFX9-MUBUF-NEXT:    buffer_store_dword v4, off, s[8:11], 0 offset:424
+; GFX9-MUBUF-NEXT:    buffer_store_dword v3, off, s[8:11], 0 offset:420
+; GFX9-MUBUF-NEXT:    buffer_store_dword v2, off, s[8:11], 0 offset:416
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v2, 0xbefcd8a3
+; GFX9-MUBUF-NEXT:    buffer_store_dword v2, off, s[8:11], 0 offset:412
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v2, 0xbefcd89f
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v7, 0xbeae29dc
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v9, 0xbe31934f
+; GFX9-MUBUF-NEXT:    buffer_store_dword v2, off, s[8:11], 0 offset:408
+; GFX9-MUBUF-NEXT:    buffer_store_dword v7, off, s[8:11], 0 offset:404
+; GFX9-MUBUF-NEXT:    buffer_store_dword v7, off, s[8:11], 0 offset:400
+; GFX9-MUBUF-NEXT:    buffer_store_dword v9, off, s[8:11], 0 offset:392
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v9, 0xb7043519
+; GFX9-MUBUF-NEXT:    buffer_store_dword v9, off, s[8:11], 0 offset:388
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v9, 0xb702e758
+; GFX9-MUBUF-NEXT:    buffer_store_dword v9, off, s[8:11], 0 offset:384
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v9, 0x3e31934f
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v10, 0x3eae29d8
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v11, 0x3efcd89c
+; GFX9-MUBUF-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v8, 0xbe319356
+; GFX9-MUBUF-NEXT:    buffer_store_dword v9, off, s[8:11], 0 offset:380
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v9, 0x3e319356
+; GFX9-MUBUF-NEXT:    buffer_store_dword v10, off, s[8:11], 0 offset:372
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v10, 0x3eae29dc
+; GFX9-MUBUF-NEXT:    buffer_store_dword v11, off, s[8:11], 0 offset:364
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v11, 0x3efcd89f
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v12, 0xbf20e7f5
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v13, 0xbf3d349e
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v14, 0xbf523be3
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v15, 0xbf5f2ee3
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v16, 0xbf638e39
+; GFX9-MUBUF-NEXT:    v_and_b32_e32 v0, 0x1fc, v0
+; GFX9-MUBUF-NEXT:    buffer_store_dword v8, off, s[8:11], 0 offset:396
+; GFX9-MUBUF-NEXT:    buffer_store_dword v9, off, s[8:11], 0 offset:376
+; GFX9-MUBUF-NEXT:    buffer_store_dword v10, off, s[8:11], 0 offset:368
+; GFX9-MUBUF-NEXT:    buffer_store_dword v11, off, s[8:11], 0 offset:360
+; GFX9-MUBUF-NEXT:    buffer_store_dword v12, off, s[8:11], 0 offset:356
+; GFX9-MUBUF-NEXT:    buffer_store_dword v13, off, s[8:11], 0 offset:352
+; GFX9-MUBUF-NEXT:    buffer_store_dword v14, off, s[8:11], 0 offset:348
+; GFX9-MUBUF-NEXT:    buffer_store_dword v15, off, s[8:11], 0 offset:344
+; GFX9-MUBUF-NEXT:    buffer_store_dword v16, off, s[8:11], 0 offset:340
+; GFX9-MUBUF-NEXT:    buffer_store_dword v15, off, s[8:11], 0 offset:336
+; GFX9-MUBUF-NEXT:    buffer_store_dword v14, off, s[8:11], 0 offset:332
+; GFX9-MUBUF-NEXT:    buffer_store_dword v13, off, s[8:11], 0 offset:328
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v14, 0x3f20e7f5
+; GFX9-MUBUF-NEXT:    v_add_u32_e32 v1, 0x280, v0
+; GFX9-MUBUF-NEXT:    v_add_u32_e32 v0, 0x80, v0
+; GFX9-MUBUF-NEXT:    buffer_store_dword v14, off, s[8:11], 0 offset:324
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v17, 0x3f20e7f4
+; GFX9-MUBUF-NEXT:    buffer_load_dword v0, v0, s[8:11], 0 offen
+; GFX9-MUBUF-NEXT:    s_nop 0
+; GFX9-MUBUF-NEXT:    buffer_store_dword v17, off, s[8:11], 0 offset:960
+; GFX9-MUBUF-NEXT:    buffer_store_dword v11, off, s[8:11], 0 offset:956
+; GFX9-MUBUF-NEXT:    buffer_store_dword v10, off, s[8:11], 0 offset:952
+; GFX9-MUBUF-NEXT:    buffer_store_dword v9, off, s[8:11], 0 offset:948
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v17, 0x3703c499
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v18, 0x3f3d349c
+; GFX9-MUBUF-NEXT:    buffer_store_dword v17, off, s[8:11], 0 offset:944
+; GFX9-MUBUF-NEXT:    buffer_store_dword v8, off, s[8:11], 0 offset:940
+; GFX9-MUBUF-NEXT:    buffer_store_dword v7, off, s[8:11], 0 offset:936
+; GFX9-MUBUF-NEXT:    buffer_store_dword v2, off, s[8:11], 0 offset:932
+; GFX9-MUBUF-NEXT:    buffer_store_dword v12, off, s[8:11], 0 offset:928
+; GFX9-MUBUF-NEXT:    buffer_store_dword v18, off, s[8:11], 0 offset:924
+; GFX9-MUBUF-NEXT:    buffer_store_dword v13, off, s[8:11], 0 offset:920
+; GFX9-MUBUF-NEXT:    buffer_store_dword v4, off, s[8:11], 0 offset:916
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v18, 0xbf523be1
+; GFX9-MUBUF-NEXT:    buffer_store_dword v18, off, s[8:11], 0 offset:912
+; GFX9-MUBUF-NEXT:    buffer_store_dword v5, off, s[8:11], 0 offset:908
+; GFX9-MUBUF-NEXT:    buffer_store_dword v15, off, s[8:11], 0 offset:904
+; GFX9-MUBUF-NEXT:    buffer_store_dword v6, off, s[8:11], 0 offset:900
+; GFX9-MUBUF-NEXT:    buffer_store_dword v16, off, s[8:11], 0 offset:896
+; GFX9-MUBUF-NEXT:    buffer_store_dword v5, off, s[8:11], 0 offset:892
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v5, 0xbf5f2ee2
+; GFX9-MUBUF-NEXT:    buffer_store_dword v5, off, s[8:11], 0 offset:888
+; GFX9-MUBUF-NEXT:    buffer_store_dword v4, off, s[8:11], 0 offset:884
+; GFX9-MUBUF-NEXT:    buffer_store_dword v18, off, s[8:11], 0 offset:880
+; GFX9-MUBUF-NEXT:    buffer_store_dword v3, off, s[8:11], 0 offset:876
+; GFX9-MUBUF-NEXT:    buffer_store_dword v13, off, s[8:11], 0 offset:872
+; GFX9-MUBUF-NEXT:    buffer_store_dword v14, off, s[8:11], 0 offset:868
+; GFX9-MUBUF-NEXT:    buffer_store_dword v11, off, s[8:11], 0 offset:864
+; GFX9-MUBUF-NEXT:    buffer_store_dword v10, off, s[8:11], 0 offset:860
+; GFX9-MUBUF-NEXT:    buffer_store_dword v9, off, s[8:11], 0 offset:856
+; GFX9-MUBUF-NEXT:    buffer_store_dword v17, off, s[8:11], 0 offset:852
+; GFX9-MUBUF-NEXT:    buffer_store_dword v8, off, s[8:11], 0 offset:848
+; GFX9-MUBUF-NEXT:    buffer_store_dword v7, off, s[8:11], 0 offset:844
+; GFX9-MUBUF-NEXT:    buffer_store_dword v2, off, s[8:11], 0 offset:840
+; GFX9-MUBUF-NEXT:    buffer_store_dword v12, off, s[8:11], 0 offset:836
+; GFX9-MUBUF-NEXT:    buffer_load_dword v1, v1, s[8:11], 0 offen
+; GFX9-MUBUF-NEXT:    s_mov_b32 s2, s5
+; GFX9-MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-MUBUF-NEXT:    v_add_f32_e32 v0, v0, v1
+; GFX9-MUBUF-NEXT:    ; return to shader part epilog
+;
+; GFX10_W32-MUBUF-LABEL: hs_ir_uses_scratch_offset:
+; GFX10_W32-MUBUF:       ; %bb.0:
+; GFX10_W32-MUBUF-NEXT:    s_mov_b32 s8, SCRATCH_RSRC_DWORD0
+; GFX10_W32-MUBUF-NEXT:    s_mov_b32 s9, SCRATCH_RSRC_DWORD1
+; GFX10_W32-MUBUF-NEXT:    s_mov_b32 s10, -1
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v2, 0x3f3d349e
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v4, 0x3f5f2ee2
+; GFX10_W32-MUBUF-NEXT:    s_mov_b32 s11, 0x31c16000
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v3, 0x3f523be1
+; GFX10_W32-MUBUF-NEXT:    s_add_u32 s8, s8, s5
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v7, 0xbefcd8a3
+; GFX10_W32-MUBUF-NEXT:    s_addc_u32 s9, s9, 0
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v5, 0x3f638e37
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v8, 0xbefcd89f
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v9, 0xbeae29dc
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v1, off, s[8:11], 0 offset:448
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v2, off, s[8:11], 0 offset:444
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v3, off, s[8:11], 0 offset:440
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v4, off, s[8:11], 0 offset:436
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v5, off, s[8:11], 0 offset:432
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v4, off, s[8:11], 0 offset:428
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v3, off, s[8:11], 0 offset:424
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v2, off, s[8:11], 0 offset:420
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v1, off, s[8:11], 0 offset:416
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v7, off, s[8:11], 0 offset:412
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v8, off, s[8:11], 0 offset:408
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v9, off, s[8:11], 0 offset:404
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v9, off, s[8:11], 0 offset:400
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v1, 0xbe319356
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v7, 0xbe31934f
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v10, 0xb7043519
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v11, 0xb702e758
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v12, 0x3e31934f
+; GFX10_W32-MUBUF-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v1, off, s[8:11], 0 offset:396
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v7, off, s[8:11], 0 offset:392
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v10, off, s[8:11], 0 offset:388
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v11, off, s[8:11], 0 offset:384
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v12, off, s[8:11], 0 offset:380
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v7, 0x3e319356
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v10, 0x3eae29d8
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v11, 0x3eae29dc
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v12, 0x3efcd89c
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v13, 0x3efcd89f
+; GFX10_W32-MUBUF-NEXT:    v_and_b32_e32 v0, 0x1fc, v0
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v7, off, s[8:11], 0 offset:376
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v10, off, s[8:11], 0 offset:372
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v11, off, s[8:11], 0 offset:368
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v12, off, s[8:11], 0 offset:364
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v13, off, s[8:11], 0 offset:360
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v10, 0xbf20e7f5
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v15, 0xbf5f2ee3
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v12, 0xbf3d349e
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v14, 0xbf523be3
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v16, 0xbf638e39
+; GFX10_W32-MUBUF-NEXT:    v_add_nc_u32_e32 v6, 0x280, v0
+; GFX10_W32-MUBUF-NEXT:    v_add_nc_u32_e32 v0, 0x80, v0
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v10, off, s[8:11], 0 offset:356
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v12, off, s[8:11], 0 offset:352
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v14, off, s[8:11], 0 offset:348
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v15, off, s[8:11], 0 offset:344
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v16, off, s[8:11], 0 offset:340
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v17, 0x3f20e7f5
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v15, off, s[8:11], 0 offset:336
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v14, off, s[8:11], 0 offset:332
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v14, 0x3f20e7f4
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v12, off, s[8:11], 0 offset:328
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v17, off, s[8:11], 0 offset:324
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v18, 0x3703c499
+; GFX10_W32-MUBUF-NEXT:    buffer_load_dword v0, v0, s[8:11], 0 offen
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v14, off, s[8:11], 0 offset:960
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v13, off, s[8:11], 0 offset:956
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v11, off, s[8:11], 0 offset:952
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v7, off, s[8:11], 0 offset:948
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v18, off, s[8:11], 0 offset:944
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v1, off, s[8:11], 0 offset:940
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v14, 0x3f3d349c
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v19, 0xbf523be1
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v9, off, s[8:11], 0 offset:936
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v8, off, s[8:11], 0 offset:932
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v10, off, s[8:11], 0 offset:928
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v14, off, s[8:11], 0 offset:924
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v12, off, s[8:11], 0 offset:920
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v3, off, s[8:11], 0 offset:916
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v19, off, s[8:11], 0 offset:912
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v4, off, s[8:11], 0 offset:908
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v14, 0xbf5f2ee2
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v15, off, s[8:11], 0 offset:904
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v5, off, s[8:11], 0 offset:900
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v16, off, s[8:11], 0 offset:896
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v4, off, s[8:11], 0 offset:892
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v14, off, s[8:11], 0 offset:888
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v3, off, s[8:11], 0 offset:884
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v19, off, s[8:11], 0 offset:880
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v2, off, s[8:11], 0 offset:876
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v12, off, s[8:11], 0 offset:872
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v17, off, s[8:11], 0 offset:868
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v13, off, s[8:11], 0 offset:864
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v11, off, s[8:11], 0 offset:860
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v7, off, s[8:11], 0 offset:856
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v18, off, s[8:11], 0 offset:852
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v1, off, s[8:11], 0 offset:848
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v9, off, s[8:11], 0 offset:844
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v8, off, s[8:11], 0 offset:840
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v10, off, s[8:11], 0 offset:836
+; GFX10_W32-MUBUF-NEXT:    buffer_load_dword v1, v6, s[8:11], 0 offen
+; GFX10_W32-MUBUF-NEXT:    s_mov_b32 s2, s5
+; GFX10_W32-MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; GFX10_W32-MUBUF-NEXT:    v_add_f32_e32 v0, v0, v1
+; GFX10_W32-MUBUF-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10_W32-MUBUF-NEXT:    ; return to shader part epilog
+;
+; GFX10_W64-MUBUF-LABEL: hs_ir_uses_scratch_offset:
+; GFX10_W64-MUBUF:       ; %bb.0:
+; GFX10_W64-MUBUF-NEXT:    s_mov_b32 s8, SCRATCH_RSRC_DWORD0
+; GFX10_W64-MUBUF-NEXT:    s_mov_b32 s9, SCRATCH_RSRC_DWORD1
+; GFX10_W64-MUBUF-NEXT:    s_mov_b32 s10, -1
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v2, 0x3f3d349e
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v4, 0x3f5f2ee2
+; GFX10_W64-MUBUF-NEXT:    s_mov_b32 s11, 0x31e16000
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v3, 0x3f523be1
+; GFX10_W64-MUBUF-NEXT:    s_add_u32 s8, s8, s5
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v7, 0xbefcd8a3
+; GFX10_W64-MUBUF-NEXT:    s_addc_u32 s9, s9, 0
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v5, 0x3f638e37
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v8, 0xbefcd89f
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v9, 0xbeae29dc
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v1, off, s[8:11], 0 offset:448
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v2, off, s[8:11], 0 offset:444
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v3, off, s[8:11], 0 offset:440
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v4, off, s[8:11], 0 offset:436
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v5, off, s[8:11], 0 offset:432
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v4, off, s[8:11], 0 offset:428
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v3, off, s[8:11], 0 offset:424
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v2, off, s[8:11], 0 offset:420
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v1, off, s[8:11], 0 offset:416
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v7, off, s[8:11], 0 offset:412
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v8, off, s[8:11], 0 offset:408
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v9, off, s[8:11], 0 offset:404
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v9, off, s[8:11], 0 offset:400
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v1, 0xbe319356
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v7, 0xbe31934f
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v10, 0xb7043519
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v11, 0xb702e758
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v12, 0x3e31934f
+; GFX10_W64-MUBUF-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v1, off, s[8:11], 0 offset:396
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v7, off, s[8:11], 0 offset:392
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v10, off, s[8:11], 0 offset:388
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v11, off, s[8:11], 0 offset:384
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v12, off, s[8:11], 0 offset:380
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v7, 0x3e319356
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v10, 0x3eae29d8
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v11, 0x3eae29dc
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v12, 0x3efcd89c
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v13, 0x3efcd89f
+; GFX10_W64-MUBUF-NEXT:    v_and_b32_e32 v0, 0x1fc, v0
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v7, off, s[8:11], 0 offset:376
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v10, off, s[8:11], 0 offset:372
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v11, off, s[8:11], 0 offset:368
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v12, off, s[8:11], 0 offset:364
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v13, off, s[8:11], 0 offset:360
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v10, 0xbf20e7f5
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v15, 0xbf5f2ee3
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v12, 0xbf3d349e
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v14, 0xbf523be3
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v16, 0xbf638e39
+; GFX10_W64-MUBUF-NEXT:    v_add_nc_u32_e32 v6, 0x280, v0
+; GFX10_W64-MUBUF-NEXT:    v_add_nc_u32_e32 v0, 0x80, v0
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v10, off, s[8:11], 0 offset:356
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v12, off, s[8:11], 0 offset:352
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v14, off, s[8:11], 0 offset:348
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v15, off, s[8:11], 0 offset:344
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v16, off, s[8:11], 0 offset:340
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v17, 0x3f20e7f5
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v15, off, s[8:11], 0 offset:336
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v14, off, s[8:11], 0 offset:332
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v14, 0x3f20e7f4
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v12, off, s[8:11], 0 offset:328
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v17, off, s[8:11], 0 offset:324
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v18, 0x3703c499
+; GFX10_W64-MUBUF-NEXT:    buffer_load_dword v0, v0, s[8:11], 0 offen
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v14, off, s[8:11], 0 offset:960
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v13, off, s[8:11], 0 offset:956
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v11, off, s[8:11], 0 offset:952
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v7, off, s[8:11], 0 offset:948
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v18, off, s[8:11], 0 offset:944
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v1, off, s[8:11], 0 offset:940
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v14, 0x3f3d349c
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v19, 0xbf523be1
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v9, off, s[8:11], 0 offset:936
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v8, off, s[8:11], 0 offset:932
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v10, off, s[8:11], 0 offset:928
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v14, off, s[8:11], 0 offset:924
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v12, off, s[8:11], 0 offset:920
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v3, off, s[8:11], 0 offset:916
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v19, off, s[8:11], 0 offset:912
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v4, off, s[8:11], 0 offset:908
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v14, 0xbf5f2ee2
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v15, off, s[8:11], 0 offset:904
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v5, off, s[8:11], 0 offset:900
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v16, off, s[8:11], 0 offset:896
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v4, off, s[8:11], 0 offset:892
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v14, off, s[8:11], 0 offset:888
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v3, off, s[8:11], 0 offset:884
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v19, off, s[8:11], 0 offset:880
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v2, off, s[8:11], 0 offset:876
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v12, off, s[8:11], 0 offset:872
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v17, off, s[8:11], 0 offset:868
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v13, off, s[8:11], 0 offset:864
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v11, off, s[8:11], 0 offset:860
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v7, off, s[8:11], 0 offset:856
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v18, off, s[8:11], 0 offset:852
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v1, off, s[8:11], 0 offset:848
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v9, off, s[8:11], 0 offset:844
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v8, off, s[8:11], 0 offset:840
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v10, off, s[8:11], 0 offset:836
+; GFX10_W64-MUBUF-NEXT:    buffer_load_dword v1, v6, s[8:11], 0 offen
+; GFX10_W64-MUBUF-NEXT:    s_mov_b32 s2, s5
+; GFX10_W64-MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; GFX10_W64-MUBUF-NEXT:    v_add_f32_e32 v0, v0, v1
+; GFX10_W64-MUBUF-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10_W64-MUBUF-NEXT:    ; return to shader part epilog
+;
+; GFX9-FLATSCR-LABEL: hs_ir_uses_scratch_offset:
+; GFX9-FLATSCR:       ; %bb.0:
+; GFX9-FLATSCR-NEXT:    s_add_u32 flat_scratch_lo, s0, s5
+; GFX9-FLATSCR-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-FLATSCR-NEXT:    s_addc_u32 flat_scratch_hi, s1, 0
+; GFX9-FLATSCR-NEXT:    v_and_b32_e32 v23, 0x1fc, v0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v0, 0xbf20e7f4
+; GFX9-FLATSCR-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v5, 0x3f5f2ee2
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v6, 0x3f523be1
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v7, 0x3f3d349e
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:448
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v4, 0x3f638e37
+; GFX9-FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v1, v7
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v2, v6
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v3, v5
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s3, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v12, 0x3eae29dc
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v13, 0x3eae29d8
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v14, 0x3e319356
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v15, 0x3e31934f
+; GFX9-FLATSCR-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[4:7], vcc_hi offset:432
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s3 offset:416
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[12:15], vcc_lo offset:368
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v0, 0xbeae29dc
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v15, 0xbf3d349e
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v16, 0xbf20e7f5
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v17, 0x3efcd89f
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v18, 0x3efcd89c
+; GFX9-FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v2, 0xbefcd89f
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v3, 0xbefcd8a3
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v1, v0
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s1, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v8, 0xb702e758
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v9, 0xb7043519
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v10, 0xbe31934f
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v11, 0xbe319356
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s0, 0
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[15:18], vcc_hi offset:352
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v21, 0xbf523be3
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v18, 0xbf5f2ee3
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s1 offset:400
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[8:11], s0 offset:384
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v19, 0xbf638e39
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v20, v18
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s3, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v8, 0x3f20e7f5
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v9, v15
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v10, v21
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s1, 0
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[18:21], s3 offset:336
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[7:10], s1 offset:320
+; GFX9-FLATSCR-NEXT:    v_add_u32_e32 v1, 0x80, v23
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v21, v18
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v20, v4
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v22, v5
+; GFX9-FLATSCR-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX9-FLATSCR-NEXT:    scratch_load_dword v13, v1, off
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v1, 0x3f20e7f4
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s0, 0
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[19:22], vcc_lo offset:896
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v18, 0xbf523be1
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v21, 0x3f3d349c
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v19, v6
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v20, v15
+; GFX9-FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[1:4], s0 offset:960
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[18:21], vcc_hi offset:912
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s4, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v20, 0xbf5f2ee2
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v21, v5
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v10, v7
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v7, v17
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s3, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v3, 0x3703c499
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v4, v14
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v5, v12
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v6, v17
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s1, 0
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[18:21], s4 offset:880
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[7:10], s3 offset:864
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[3:6], s1 offset:944
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v8, v16
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v9, v2
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v10, v0
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s0, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v4, v11
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v5, v3
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v6, v14
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v7, v12
+; GFX9-FLATSCR-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v17, v2
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v18, v0
+; GFX9-FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[8:11], s0 offset:928
+; GFX9-FLATSCR-NEXT:    v_add_u32_e32 v1, 0x280, v23
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[4:7], vcc_lo offset:848
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[15:18], vcc_hi offset:832
+; GFX9-FLATSCR-NEXT:    scratch_load_dword v0, v1, off
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s2, s7
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    v_add_f32_e32 v0, v13, v0
+; GFX9-FLATSCR-NEXT:    ; return to shader part epilog
+;
+; GFX10-FLATSCR-LABEL: hs_ir_uses_scratch_offset:
+; GFX10-FLATSCR:       ; %bb.0:
+; GFX10-FLATSCR-NEXT:    s_add_u32 s0, s0, s5
+; GFX10-FLATSCR-NEXT:    s_addc_u32 s1, s1, 0
+; GFX10-FLATSCR-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
+; GFX10-FLATSCR-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v8, 0x3f3d349e
+; GFX10-FLATSCR-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v6, 0x3f5f2ee2
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v7, 0x3f523be1
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v9, 0xbeae29dc
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v5, 0x3f638e37
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[1:4], off offset:448
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v2, v8
+; GFX10-FLATSCR-NEXT:    v_and_b32_e32 v0, 0x1fc, v0
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v3, v7
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v4, v6
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v11, 0xbefcd89f
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v12, 0xbefcd8a3
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v10, v9
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v16, 0xbf3d349e
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v20, 0xbf5f2ee3
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v23, 0xbf523be3
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[5:8], off offset:432
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[1:4], off offset:416
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[9:12], off offset:400
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v12, 0x3eae29dc
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v14, 0x3e319356
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v18, 0x3efcd89f
+; GFX10-FLATSCR-NEXT:    v_add_nc_u32_e32 v39, 0x280, v0
+; GFX10-FLATSCR-NEXT:    v_add_nc_u32_e32 v35, 0x80, v0
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v0, 0xb702e758
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v1, 0xb7043519
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v2, 0xbe31934f
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v3, 0xbe319356
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v17, 0xbf20e7f5
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v13, 0x3eae29d8
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v15, 0x3e31934f
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v19, 0x3efcd89c
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v21, 0xbf638e39
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v22, v20
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v24, 0x3f20e7f5
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v25, v16
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v26, v23
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v4, 0x3f20e7f4
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v27, 0x3703c499
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v28, v14
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v29, v12
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v30, v18
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], off offset:384
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[12:15], off offset:368
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[16:19], off offset:352
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[20:23], off offset:336
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:320
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v0, v17
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v1, v11
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v2, v9
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v31, 0xbf523be1
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v34, 0x3f3d349c
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v32, v7
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v33, v16
+; GFX10-FLATSCR-NEXT:    scratch_load_dword v10, v35, off
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v35, v21
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v36, v5
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v37, v20
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v38, v6
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[4:7], off offset:960
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[27:30], off offset:944
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], off offset:928
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[31:34], off offset:912
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v33, 0xbf5f2ee2
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v34, v6
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v23, v18
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v26, v8
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v0, v3
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v1, v27
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v2, v14
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v3, v12
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v18, v11
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v19, v9
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[35:38], off offset:896
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[31:34], off offset:880
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:864
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], off offset:848
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[16:19], off offset:832
+; GFX10-FLATSCR-NEXT:    scratch_load_dword v0, v39, off
+; GFX10-FLATSCR-NEXT:    s_mov_b32 s2, s7
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    v_add_f32_e32 v0, v10, v0
+; GFX10-FLATSCR-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-FLATSCR-NEXT:    ; return to shader part epilog
+;
+; GFX9-FLATSCR-PAL-LABEL: hs_ir_uses_scratch_offset:
+; GFX9-FLATSCR-PAL:       ; %bb.0:
+; GFX9-FLATSCR-PAL-NEXT:    s_getpc_b64 s[0:1]
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s0, s8
+; GFX9-FLATSCR-PAL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
+; GFX9-FLATSCR-PAL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-FLATSCR-PAL-NEXT:    v_and_b32_e32 v23, 0x1fc, v0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v0, 0xbf20e7f4
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX9-FLATSCR-PAL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-FLATSCR-PAL-NEXT:    s_and_b32 s1, s1, 0xffff
+; GFX9-FLATSCR-PAL-NEXT:    s_add_u32 flat_scratch_lo, s0, s5
+; GFX9-FLATSCR-PAL-NEXT:    s_addc_u32 flat_scratch_hi, s1, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v5, 0x3f5f2ee2
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v6, 0x3f523be1
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v7, 0x3f3d349e
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:448
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v4, 0x3f638e37
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v1, v7
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v2, v6
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v3, v5
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s3, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v12, 0x3eae29dc
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v13, 0x3eae29d8
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v14, 0x3e319356
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v15, 0x3e31934f
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[4:7], vcc_hi offset:432
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s3 offset:416
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[12:15], vcc_lo offset:368
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v0, 0xbeae29dc
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v15, 0xbf3d349e
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v16, 0xbf20e7f5
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v17, 0x3efcd89f
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v18, 0x3efcd89c
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v2, 0xbefcd89f
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v3, 0xbefcd8a3
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v1, v0
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s1, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v8, 0xb702e758
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v9, 0xb7043519
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v10, 0xbe31934f
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v11, 0xbe319356
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s0, 0
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[15:18], vcc_hi offset:352
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v21, 0xbf523be3
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v18, 0xbf5f2ee3
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s1 offset:400
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[8:11], s0 offset:384
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v19, 0xbf638e39
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v20, v18
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s3, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v8, 0x3f20e7f5
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v9, v15
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v10, v21
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s1, 0
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[18:21], s3 offset:336
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[7:10], s1 offset:320
+; GFX9-FLATSCR-PAL-NEXT:    v_add_u32_e32 v1, 0x80, v23
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v21, v18
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v20, v4
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v22, v5
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX9-FLATSCR-PAL-NEXT:    scratch_load_dword v13, v1, off
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v1, 0x3f20e7f4
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s0, 0
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[19:22], vcc_lo offset:896
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v18, 0xbf523be1
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v21, 0x3f3d349c
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v19, v6
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v20, v15
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[1:4], s0 offset:960
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[18:21], vcc_hi offset:912
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s4, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v20, 0xbf5f2ee2
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v21, v5
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v10, v7
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v7, v17
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s3, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v3, 0x3703c499
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v4, v14
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v5, v12
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v6, v17
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s1, 0
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[18:21], s4 offset:880
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[7:10], s3 offset:864
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[3:6], s1 offset:944
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v8, v16
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v9, v2
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v10, v0
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s0, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v4, v11
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v5, v3
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v6, v14
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v7, v12
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v17, v2
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v18, v0
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[8:11], s0 offset:928
+; GFX9-FLATSCR-PAL-NEXT:    v_add_u32_e32 v1, 0x280, v23
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[4:7], vcc_lo offset:848
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[15:18], vcc_hi offset:832
+; GFX9-FLATSCR-PAL-NEXT:    scratch_load_dword v0, v1, off
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s2, s5
+; GFX9-FLATSCR-PAL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-PAL-NEXT:    v_add_f32_e32 v0, v13, v0
+; GFX9-FLATSCR-PAL-NEXT:    ; return to shader part epilog
+;
+; GFX10-FLATSCR-PAL-LABEL: hs_ir_uses_scratch_offset:
+; GFX10-FLATSCR-PAL:       ; %bb.0:
+; GFX10-FLATSCR-PAL-NEXT:    s_getpc_b64 s[0:1]
+; GFX10-FLATSCR-PAL-NEXT:    s_mov_b32 s0, s8
+; GFX10-FLATSCR-PAL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
+; GFX10-FLATSCR-PAL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-FLATSCR-PAL-NEXT:    s_and_b32 s1, s1, 0xffff
+; GFX10-FLATSCR-PAL-NEXT:    s_add_u32 s0, s0, s5
+; GFX10-FLATSCR-PAL-NEXT:    s_addc_u32 s1, s1, 0
+; GFX10-FLATSCR-PAL-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
+; GFX10-FLATSCR-PAL-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v8, 0x3f3d349e
+; GFX10-FLATSCR-PAL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v6, 0x3f5f2ee2
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v7, 0x3f523be1
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v9, 0xbeae29dc
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v5, 0x3f638e37
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[1:4], off offset:448
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v2, v8
+; GFX10-FLATSCR-PAL-NEXT:    v_and_b32_e32 v0, 0x1fc, v0
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v3, v7
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v4, v6
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v11, 0xbefcd89f
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v12, 0xbefcd8a3
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v10, v9
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v16, 0xbf3d349e
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v20, 0xbf5f2ee3
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v23, 0xbf523be3
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[5:8], off offset:432
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[1:4], off offset:416
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[9:12], off offset:400
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v12, 0x3eae29dc
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v14, 0x3e319356
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v18, 0x3efcd89f
+; GFX10-FLATSCR-PAL-NEXT:    v_add_nc_u32_e32 v39, 0x280, v0
+; GFX10-FLATSCR-PAL-NEXT:    v_add_nc_u32_e32 v35, 0x80, v0
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v0, 0xb702e758
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v1, 0xb7043519
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v2, 0xbe31934f
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v3, 0xbe319356
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v17, 0xbf20e7f5
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v13, 0x3eae29d8
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v15, 0x3e31934f
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v19, 0x3efcd89c
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v21, 0xbf638e39
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v22, v20
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v24, 0x3f20e7f5
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v25, v16
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v26, v23
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v4, 0x3f20e7f4
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v27, 0x3703c499
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v28, v14
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v29, v12
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v30, v18
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], off offset:384
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[12:15], off offset:368
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[16:19], off offset:352
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[20:23], off offset:336
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:320
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v0, v17
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v1, v11
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v2, v9
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v31, 0xbf523be1
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v34, 0x3f3d349c
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v32, v7
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v33, v16
+; GFX10-FLATSCR-PAL-NEXT:    scratch_load_dword v10, v35, off
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v35, v21
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v36, v5
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v37, v20
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v38, v6
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[4:7], off offset:960
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[27:30], off offset:944
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], off offset:928
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[31:34], off offset:912
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v33, 0xbf5f2ee2
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v34, v6
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v23, v18
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v26, v8
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v0, v3
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v1, v27
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v2, v14
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v3, v12
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v18, v11
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v19, v9
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[35:38], off offset:896
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[31:34], off offset:880
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:864
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], off offset:848
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[16:19], off offset:832
+; GFX10-FLATSCR-PAL-NEXT:    scratch_load_dword v0, v39, off
+; GFX10-FLATSCR-PAL-NEXT:    s_mov_b32 s2, s5
+; GFX10-FLATSCR-PAL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-PAL-NEXT:    v_add_f32_e32 v0, v10, v0
+; GFX10-FLATSCR-PAL-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-FLATSCR-PAL-NEXT:    ; return to shader part epilog
   %v1 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0xBFEA477C60000000, float 0xBFEBE5DC60000000, float 0xBFEC71C720000000, float 0xBFEBE5DC60000000, float 0xBFEA477C60000000, float 0xBFE7A693C0000000, float 0xBFE41CFEA0000000, float 0x3FDF9B13E0000000, float 0x3FDF9B1380000000, float 0x3FD5C53B80000000, float 0x3FD5C53B00000000, float 0x3FC6326AC0000000, float 0x3FC63269E0000000, float 0xBEE05CEB00000000, float 0xBEE086A320000000, float 0xBFC63269E0000000, float 0xBFC6326AC0000000, float 0xBFD5C53B80000000, float 0xBFD5C53B80000000, float 0xBFDF9B13E0000000, float 0xBFDF9B1460000000, float 0xBFE41CFE80000000, float 0x3FE7A693C0000000, float 0x3FEA477C20000000, float 0x3FEBE5DC40000000, float 0x3FEC71C6E0000000, float 0x3FEBE5DC40000000, float 0x3FEA477C20000000, float 0x3FE7A693C0000000, float 0xBFE41CFE80000000>, i32 %idx
   %v2 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0x3FE7A693C0000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFEBE5DC40000000, float 0x3FEBE5DC40000000, float 0xBFEC71C720000000, float 0x3FEC71C6E0000000, float 0xBFEBE5DC60000000, float 0x3FEBE5DC40000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFE7A693C0000000, float 0x3FE7A69380000000, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFE80000000>, i32 %idx
   %f = fadd float %v1, %v2
@@ -317,50 +5954,994 @@ define amdgpu_hs <{i32, i32, i32, float}> @hs_ir_uses_scratch_offset(i32 inreg,
   ret <{i32, i32, i32, float}> %r2
 }
 
-; GCN-LABEL: {{^}}gs_ir_uses_scratch_offset:
-; GFX9-FLATSCR: s_add_u32 flat_scratch_lo, s0, s5
-; GFX9-FLATSCR: s_addc_u32 flat_scratch_hi, s1, 0
-
-; GFX10-FLATSCR: s_add_u32 s0, s0, s5
-; GFX10-FLATSCR: s_addc_u32 s1, s1, 0
-; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
-; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
-
-; GFX9-FLATSCR-PAL-DAG: s_getpc_b64 s[0:1]
-; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s0, s8
-; GFX9-FLATSCR-PAL-DAG: s_load_dwordx2 s[0:1], s[0:1], 0x0
-; GFX9-FLATSCR-PAL-DAG: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-FLATSCR-PAL-DAG: v_mov_b32_e32 v0, 0xbf20e7f4
-; GFX9-FLATSCR-PAL-DAG: s_mov_b32 vcc_hi, 0
-; GFX9-FLATSCR-PAL-DAG: s_waitcnt lgkmcnt(0)
-; GFX9-FLATSCR-PAL-DAG: s_and_b32 s1, s1, 0xffff
-; GFX9-FLATSCR-PAL-DAG: s_add_u32 flat_scratch_lo, s0, s5
-; GFX9-FLATSCR-PAL-DAG: s_addc_u32 flat_scratch_hi, s1, 0
-
-; GFX10-FLATSCR-PAL: s_getpc_b64 s[0:1]
-; GFX10-FLATSCR-PAL: s_mov_b32 s0, s8
-; GFX10-FLATSCR-PAL: s_load_dwordx2 s[0:1], s[0:1], 0x0
-; GFX10-FLATSCR-PAL: s_waitcnt lgkmcnt(0)
-; GFX10-FLATSCR-PAL: s_and_b32 s1, s1, 0xffff
-; GFX10-FLATSCR-PAL: s_add_u32 s0, s0, s5
-; GFX10-FLATSCR-PAL: s_addc_u32 s1, s1, 0
-; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
-; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
-
-; MUBUF: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
-; FLATSCR-NOT: SCRATCH_RSRC_DWORD
-
-; SIVI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
-; SIVI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
-
-; GFX9_10-MUBUF-DAG: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
-; GFX9_10-MUBUF-DAG: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
-
-; MUBUF-DAG: s_mov_b32 s2, s5
-
-; FLATSCR-DAG: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off
-; FLATSCR-DAG: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off
 define amdgpu_gs <{i32, i32, i32, float}> @gs_ir_uses_scratch_offset(i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg %swo, i32 %idx) {
+; SI-LABEL: gs_ir_uses_scratch_offset:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_mov_b32 s8, SCRATCH_RSRC_DWORD0
+; SI-NEXT:    s_mov_b32 s9, SCRATCH_RSRC_DWORD1
+; SI-NEXT:    s_mov_b32 s10, -1
+; SI-NEXT:    s_mov_b32 s11, 0xe8f000
+; SI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; SI-NEXT:    s_add_u32 s8, s8, s6
+; SI-NEXT:    v_and_b32_e32 v0, 0x1fc, v0
+; SI-NEXT:    v_mov_b32_e32 v2, 0x80
+; SI-NEXT:    s_addc_u32 s9, s9, 0
+; SI-NEXT:    v_add_i32_e32 v1, vcc, 0x280, v0
+; SI-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
+; SI-NEXT:    v_mov_b32_e32 v2, 0xbf20e7f4
+; SI-NEXT:    v_mov_b32_e32 v3, 0x3f3d349e
+; SI-NEXT:    v_mov_b32_e32 v4, 0x3f523be1
+; SI-NEXT:    v_mov_b32_e32 v5, 0x3f5f2ee2
+; SI-NEXT:    v_mov_b32_e32 v6, 0x3f638e37
+; SI-NEXT:    buffer_store_dword v2, off, s[8:11], 0 offset:448
+; SI-NEXT:    buffer_store_dword v3, off, s[8:11], 0 offset:444
+; SI-NEXT:    buffer_store_dword v4, off, s[8:11], 0 offset:440
+; SI-NEXT:    buffer_store_dword v5, off, s[8:11], 0 offset:436
+; SI-NEXT:    buffer_store_dword v6, off, s[8:11], 0 offset:432
+; SI-NEXT:    buffer_store_dword v5, off, s[8:11], 0 offset:428
+; SI-NEXT:    buffer_store_dword v4, off, s[8:11], 0 offset:424
+; SI-NEXT:    buffer_store_dword v3, off, s[8:11], 0 offset:420
+; SI-NEXT:    buffer_store_dword v2, off, s[8:11], 0 offset:416
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v2, 0xbefcd8a3
+; SI-NEXT:    buffer_store_dword v2, off, s[8:11], 0 offset:412
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v2, 0xbefcd89f
+; SI-NEXT:    v_mov_b32_e32 v7, 0xbeae29dc
+; SI-NEXT:    v_mov_b32_e32 v9, 0xbe31934f
+; SI-NEXT:    buffer_store_dword v2, off, s[8:11], 0 offset:408
+; SI-NEXT:    buffer_store_dword v7, off, s[8:11], 0 offset:404
+; SI-NEXT:    buffer_store_dword v7, off, s[8:11], 0 offset:400
+; SI-NEXT:    buffer_store_dword v9, off, s[8:11], 0 offset:392
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v9, 0xb7043519
+; SI-NEXT:    buffer_store_dword v9, off, s[8:11], 0 offset:388
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v9, 0xb702e758
+; SI-NEXT:    buffer_store_dword v9, off, s[8:11], 0 offset:384
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v9, 0x3e31934f
+; SI-NEXT:    v_mov_b32_e32 v10, 0x3eae29d8
+; SI-NEXT:    v_mov_b32_e32 v11, 0x3efcd89c
+; SI-NEXT:    v_mov_b32_e32 v8, 0xbe319356
+; SI-NEXT:    buffer_store_dword v9, off, s[8:11], 0 offset:380
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v9, 0x3e319356
+; SI-NEXT:    buffer_store_dword v10, off, s[8:11], 0 offset:372
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v10, 0x3eae29dc
+; SI-NEXT:    buffer_store_dword v11, off, s[8:11], 0 offset:364
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v11, 0x3efcd89f
+; SI-NEXT:    v_mov_b32_e32 v12, 0xbf20e7f5
+; SI-NEXT:    v_mov_b32_e32 v13, 0xbf3d349e
+; SI-NEXT:    v_mov_b32_e32 v14, 0xbf523be3
+; SI-NEXT:    v_mov_b32_e32 v15, 0xbf5f2ee3
+; SI-NEXT:    v_mov_b32_e32 v16, 0xbf638e39
+; SI-NEXT:    buffer_store_dword v8, off, s[8:11], 0 offset:396
+; SI-NEXT:    buffer_store_dword v9, off, s[8:11], 0 offset:376
+; SI-NEXT:    buffer_store_dword v10, off, s[8:11], 0 offset:368
+; SI-NEXT:    buffer_store_dword v11, off, s[8:11], 0 offset:360
+; SI-NEXT:    buffer_store_dword v12, off, s[8:11], 0 offset:356
+; SI-NEXT:    buffer_store_dword v13, off, s[8:11], 0 offset:352
+; SI-NEXT:    buffer_store_dword v14, off, s[8:11], 0 offset:348
+; SI-NEXT:    buffer_store_dword v15, off, s[8:11], 0 offset:344
+; SI-NEXT:    buffer_store_dword v16, off, s[8:11], 0 offset:340
+; SI-NEXT:    buffer_store_dword v15, off, s[8:11], 0 offset:336
+; SI-NEXT:    buffer_store_dword v14, off, s[8:11], 0 offset:332
+; SI-NEXT:    buffer_store_dword v13, off, s[8:11], 0 offset:328
+; SI-NEXT:    s_waitcnt expcnt(1)
+; SI-NEXT:    v_mov_b32_e32 v14, 0x3f20e7f5
+; SI-NEXT:    buffer_store_dword v14, off, s[8:11], 0 offset:324
+; SI-NEXT:    v_mov_b32_e32 v17, 0x3f20e7f4
+; SI-NEXT:    buffer_load_dword v0, v0, s[8:11], 0 offen
+; SI-NEXT:    buffer_store_dword v17, off, s[8:11], 0 offset:960
+; SI-NEXT:    buffer_store_dword v11, off, s[8:11], 0 offset:956
+; SI-NEXT:    buffer_store_dword v10, off, s[8:11], 0 offset:952
+; SI-NEXT:    buffer_store_dword v9, off, s[8:11], 0 offset:948
+; SI-NEXT:    s_waitcnt expcnt(3)
+; SI-NEXT:    v_mov_b32_e32 v17, 0x3703c499
+; SI-NEXT:    v_mov_b32_e32 v18, 0x3f3d349c
+; SI-NEXT:    buffer_store_dword v17, off, s[8:11], 0 offset:944
+; SI-NEXT:    buffer_store_dword v8, off, s[8:11], 0 offset:940
+; SI-NEXT:    buffer_store_dword v7, off, s[8:11], 0 offset:936
+; SI-NEXT:    buffer_store_dword v2, off, s[8:11], 0 offset:932
+; SI-NEXT:    buffer_store_dword v12, off, s[8:11], 0 offset:928
+; SI-NEXT:    buffer_store_dword v18, off, s[8:11], 0 offset:924
+; SI-NEXT:    buffer_store_dword v13, off, s[8:11], 0 offset:920
+; SI-NEXT:    buffer_store_dword v4, off, s[8:11], 0 offset:916
+; SI-NEXT:    s_waitcnt expcnt(2)
+; SI-NEXT:    v_mov_b32_e32 v18, 0xbf523be1
+; SI-NEXT:    buffer_store_dword v18, off, s[8:11], 0 offset:912
+; SI-NEXT:    buffer_store_dword v5, off, s[8:11], 0 offset:908
+; SI-NEXT:    buffer_store_dword v15, off, s[8:11], 0 offset:904
+; SI-NEXT:    buffer_store_dword v6, off, s[8:11], 0 offset:900
+; SI-NEXT:    buffer_store_dword v16, off, s[8:11], 0 offset:896
+; SI-NEXT:    buffer_store_dword v5, off, s[8:11], 0 offset:892
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v5, 0xbf5f2ee2
+; SI-NEXT:    buffer_store_dword v5, off, s[8:11], 0 offset:888
+; SI-NEXT:    buffer_store_dword v4, off, s[8:11], 0 offset:884
+; SI-NEXT:    buffer_store_dword v18, off, s[8:11], 0 offset:880
+; SI-NEXT:    buffer_store_dword v3, off, s[8:11], 0 offset:876
+; SI-NEXT:    buffer_store_dword v13, off, s[8:11], 0 offset:872
+; SI-NEXT:    buffer_store_dword v14, off, s[8:11], 0 offset:868
+; SI-NEXT:    buffer_store_dword v11, off, s[8:11], 0 offset:864
+; SI-NEXT:    buffer_store_dword v10, off, s[8:11], 0 offset:860
+; SI-NEXT:    buffer_store_dword v9, off, s[8:11], 0 offset:856
+; SI-NEXT:    buffer_store_dword v17, off, s[8:11], 0 offset:852
+; SI-NEXT:    buffer_store_dword v8, off, s[8:11], 0 offset:848
+; SI-NEXT:    buffer_store_dword v7, off, s[8:11], 0 offset:844
+; SI-NEXT:    buffer_store_dword v2, off, s[8:11], 0 offset:840
+; SI-NEXT:    buffer_store_dword v12, off, s[8:11], 0 offset:836
+; SI-NEXT:    buffer_load_dword v1, v1, s[8:11], 0 offen
+; SI-NEXT:    s_mov_b32 s2, s5
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    v_add_f32_e32 v0, v0, v1
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    ; return to shader part epilog
+;
+; VI-LABEL: gs_ir_uses_scratch_offset:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_mov_b32 s8, SCRATCH_RSRC_DWORD0
+; VI-NEXT:    s_mov_b32 s9, SCRATCH_RSRC_DWORD1
+; VI-NEXT:    s_mov_b32 s10, -1
+; VI-NEXT:    s_mov_b32 s11, 0xe80000
+; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; VI-NEXT:    s_add_u32 s8, s8, s6
+; VI-NEXT:    v_and_b32_e32 v0, 0x1fc, v0
+; VI-NEXT:    v_mov_b32_e32 v2, 0x80
+; VI-NEXT:    s_addc_u32 s9, s9, 0
+; VI-NEXT:    v_add_u32_e32 v1, vcc, 0x280, v0
+; VI-NEXT:    v_add_u32_e32 v0, vcc, v2, v0
+; VI-NEXT:    v_mov_b32_e32 v2, 0xbf20e7f4
+; VI-NEXT:    v_mov_b32_e32 v3, 0x3f3d349e
+; VI-NEXT:    v_mov_b32_e32 v4, 0x3f523be1
+; VI-NEXT:    v_mov_b32_e32 v5, 0x3f5f2ee2
+; VI-NEXT:    v_mov_b32_e32 v6, 0x3f638e37
+; VI-NEXT:    buffer_store_dword v2, off, s[8:11], 0 offset:448
+; VI-NEXT:    buffer_store_dword v3, off, s[8:11], 0 offset:444
+; VI-NEXT:    buffer_store_dword v4, off, s[8:11], 0 offset:440
+; VI-NEXT:    buffer_store_dword v5, off, s[8:11], 0 offset:436
+; VI-NEXT:    buffer_store_dword v6, off, s[8:11], 0 offset:432
+; VI-NEXT:    buffer_store_dword v5, off, s[8:11], 0 offset:428
+; VI-NEXT:    buffer_store_dword v4, off, s[8:11], 0 offset:424
+; VI-NEXT:    buffer_store_dword v3, off, s[8:11], 0 offset:420
+; VI-NEXT:    buffer_store_dword v2, off, s[8:11], 0 offset:416
+; VI-NEXT:    v_mov_b32_e32 v2, 0xbefcd8a3
+; VI-NEXT:    buffer_store_dword v2, off, s[8:11], 0 offset:412
+; VI-NEXT:    v_mov_b32_e32 v2, 0xbefcd89f
+; VI-NEXT:    v_mov_b32_e32 v7, 0xbeae29dc
+; VI-NEXT:    v_mov_b32_e32 v9, 0xbe31934f
+; VI-NEXT:    buffer_store_dword v2, off, s[8:11], 0 offset:408
+; VI-NEXT:    buffer_store_dword v7, off, s[8:11], 0 offset:404
+; VI-NEXT:    buffer_store_dword v7, off, s[8:11], 0 offset:400
+; VI-NEXT:    buffer_store_dword v9, off, s[8:11], 0 offset:392
+; VI-NEXT:    v_mov_b32_e32 v9, 0xb7043519
+; VI-NEXT:    buffer_store_dword v9, off, s[8:11], 0 offset:388
+; VI-NEXT:    v_mov_b32_e32 v9, 0xb702e758
+; VI-NEXT:    buffer_store_dword v9, off, s[8:11], 0 offset:384
+; VI-NEXT:    v_mov_b32_e32 v9, 0x3e31934f
+; VI-NEXT:    v_mov_b32_e32 v10, 0x3eae29d8
+; VI-NEXT:    v_mov_b32_e32 v11, 0x3efcd89c
+; VI-NEXT:    v_mov_b32_e32 v8, 0xbe319356
+; VI-NEXT:    buffer_store_dword v9, off, s[8:11], 0 offset:380
+; VI-NEXT:    v_mov_b32_e32 v9, 0x3e319356
+; VI-NEXT:    buffer_store_dword v10, off, s[8:11], 0 offset:372
+; VI-NEXT:    v_mov_b32_e32 v10, 0x3eae29dc
+; VI-NEXT:    buffer_store_dword v11, off, s[8:11], 0 offset:364
+; VI-NEXT:    v_mov_b32_e32 v11, 0x3efcd89f
+; VI-NEXT:    v_mov_b32_e32 v12, 0xbf20e7f5
+; VI-NEXT:    v_mov_b32_e32 v13, 0xbf3d349e
+; VI-NEXT:    v_mov_b32_e32 v14, 0xbf523be3
+; VI-NEXT:    v_mov_b32_e32 v15, 0xbf5f2ee3
+; VI-NEXT:    v_mov_b32_e32 v16, 0xbf638e39
+; VI-NEXT:    buffer_store_dword v8, off, s[8:11], 0 offset:396
+; VI-NEXT:    buffer_store_dword v9, off, s[8:11], 0 offset:376
+; VI-NEXT:    buffer_store_dword v10, off, s[8:11], 0 offset:368
+; VI-NEXT:    buffer_store_dword v11, off, s[8:11], 0 offset:360
+; VI-NEXT:    buffer_store_dword v12, off, s[8:11], 0 offset:356
+; VI-NEXT:    buffer_store_dword v13, off, s[8:11], 0 offset:352
+; VI-NEXT:    buffer_store_dword v14, off, s[8:11], 0 offset:348
+; VI-NEXT:    buffer_store_dword v15, off, s[8:11], 0 offset:344
+; VI-NEXT:    buffer_store_dword v16, off, s[8:11], 0 offset:340
+; VI-NEXT:    buffer_store_dword v15, off, s[8:11], 0 offset:336
+; VI-NEXT:    buffer_store_dword v14, off, s[8:11], 0 offset:332
+; VI-NEXT:    buffer_store_dword v13, off, s[8:11], 0 offset:328
+; VI-NEXT:    v_mov_b32_e32 v14, 0x3f20e7f5
+; VI-NEXT:    buffer_store_dword v14, off, s[8:11], 0 offset:324
+; VI-NEXT:    v_mov_b32_e32 v17, 0x3f20e7f4
+; VI-NEXT:    buffer_load_dword v0, v0, s[8:11], 0 offen
+; VI-NEXT:    buffer_store_dword v17, off, s[8:11], 0 offset:960
+; VI-NEXT:    buffer_store_dword v11, off, s[8:11], 0 offset:956
+; VI-NEXT:    buffer_store_dword v10, off, s[8:11], 0 offset:952
+; VI-NEXT:    buffer_store_dword v9, off, s[8:11], 0 offset:948
+; VI-NEXT:    v_mov_b32_e32 v17, 0x3703c499
+; VI-NEXT:    v_mov_b32_e32 v18, 0x3f3d349c
+; VI-NEXT:    buffer_store_dword v17, off, s[8:11], 0 offset:944
+; VI-NEXT:    buffer_store_dword v8, off, s[8:11], 0 offset:940
+; VI-NEXT:    buffer_store_dword v7, off, s[8:11], 0 offset:936
+; VI-NEXT:    buffer_store_dword v2, off, s[8:11], 0 offset:932
+; VI-NEXT:    buffer_store_dword v12, off, s[8:11], 0 offset:928
+; VI-NEXT:    buffer_store_dword v18, off, s[8:11], 0 offset:924
+; VI-NEXT:    buffer_store_dword v13, off, s[8:11], 0 offset:920
+; VI-NEXT:    buffer_store_dword v4, off, s[8:11], 0 offset:916
+; VI-NEXT:    v_mov_b32_e32 v18, 0xbf523be1
+; VI-NEXT:    buffer_store_dword v18, off, s[8:11], 0 offset:912
+; VI-NEXT:    buffer_store_dword v5, off, s[8:11], 0 offset:908
+; VI-NEXT:    buffer_store_dword v15, off, s[8:11], 0 offset:904
+; VI-NEXT:    buffer_store_dword v6, off, s[8:11], 0 offset:900
+; VI-NEXT:    buffer_store_dword v16, off, s[8:11], 0 offset:896
+; VI-NEXT:    buffer_store_dword v5, off, s[8:11], 0 offset:892
+; VI-NEXT:    v_mov_b32_e32 v5, 0xbf5f2ee2
+; VI-NEXT:    buffer_store_dword v5, off, s[8:11], 0 offset:888
+; VI-NEXT:    buffer_store_dword v4, off, s[8:11], 0 offset:884
+; VI-NEXT:    buffer_store_dword v18, off, s[8:11], 0 offset:880
+; VI-NEXT:    buffer_store_dword v3, off, s[8:11], 0 offset:876
+; VI-NEXT:    buffer_store_dword v13, off, s[8:11], 0 offset:872
+; VI-NEXT:    buffer_store_dword v14, off, s[8:11], 0 offset:868
+; VI-NEXT:    buffer_store_dword v11, off, s[8:11], 0 offset:864
+; VI-NEXT:    buffer_store_dword v10, off, s[8:11], 0 offset:860
+; VI-NEXT:    buffer_store_dword v9, off, s[8:11], 0 offset:856
+; VI-NEXT:    buffer_store_dword v17, off, s[8:11], 0 offset:852
+; VI-NEXT:    buffer_store_dword v8, off, s[8:11], 0 offset:848
+; VI-NEXT:    buffer_store_dword v7, off, s[8:11], 0 offset:844
+; VI-NEXT:    buffer_store_dword v2, off, s[8:11], 0 offset:840
+; VI-NEXT:    buffer_store_dword v12, off, s[8:11], 0 offset:836
+; VI-NEXT:    buffer_load_dword v1, v1, s[8:11], 0 offen
+; VI-NEXT:    s_mov_b32 s2, s5
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    v_add_f32_e32 v0, v0, v1
+; VI-NEXT:    ; return to shader part epilog
+;
+; GFX9-MUBUF-LABEL: gs_ir_uses_scratch_offset:
+; GFX9-MUBUF:       ; %bb.0:
+; GFX9-MUBUF-NEXT:    s_mov_b32 s8, SCRATCH_RSRC_DWORD0
+; GFX9-MUBUF-NEXT:    s_mov_b32 s9, SCRATCH_RSRC_DWORD1
+; GFX9-MUBUF-NEXT:    s_mov_b32 s10, -1
+; GFX9-MUBUF-NEXT:    s_mov_b32 s11, 0xe00000
+; GFX9-MUBUF-NEXT:    s_add_u32 s8, s8, s5
+; GFX9-MUBUF-NEXT:    s_addc_u32 s9, s9, 0
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v2, 0xbf20e7f4
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v3, 0x3f3d349e
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v4, 0x3f523be1
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v5, 0x3f5f2ee2
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v6, 0x3f638e37
+; GFX9-MUBUF-NEXT:    buffer_store_dword v2, off, s[8:11], 0 offset:448
+; GFX9-MUBUF-NEXT:    buffer_store_dword v3, off, s[8:11], 0 offset:444
+; GFX9-MUBUF-NEXT:    buffer_store_dword v4, off, s[8:11], 0 offset:440
+; GFX9-MUBUF-NEXT:    buffer_store_dword v5, off, s[8:11], 0 offset:436
+; GFX9-MUBUF-NEXT:    buffer_store_dword v6, off, s[8:11], 0 offset:432
+; GFX9-MUBUF-NEXT:    buffer_store_dword v5, off, s[8:11], 0 offset:428
+; GFX9-MUBUF-NEXT:    buffer_store_dword v4, off, s[8:11], 0 offset:424
+; GFX9-MUBUF-NEXT:    buffer_store_dword v3, off, s[8:11], 0 offset:420
+; GFX9-MUBUF-NEXT:    buffer_store_dword v2, off, s[8:11], 0 offset:416
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v2, 0xbefcd8a3
+; GFX9-MUBUF-NEXT:    buffer_store_dword v2, off, s[8:11], 0 offset:412
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v2, 0xbefcd89f
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v7, 0xbeae29dc
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v9, 0xbe31934f
+; GFX9-MUBUF-NEXT:    buffer_store_dword v2, off, s[8:11], 0 offset:408
+; GFX9-MUBUF-NEXT:    buffer_store_dword v7, off, s[8:11], 0 offset:404
+; GFX9-MUBUF-NEXT:    buffer_store_dword v7, off, s[8:11], 0 offset:400
+; GFX9-MUBUF-NEXT:    buffer_store_dword v9, off, s[8:11], 0 offset:392
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v9, 0xb7043519
+; GFX9-MUBUF-NEXT:    buffer_store_dword v9, off, s[8:11], 0 offset:388
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v9, 0xb702e758
+; GFX9-MUBUF-NEXT:    buffer_store_dword v9, off, s[8:11], 0 offset:384
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v9, 0x3e31934f
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v10, 0x3eae29d8
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v11, 0x3efcd89c
+; GFX9-MUBUF-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v8, 0xbe319356
+; GFX9-MUBUF-NEXT:    buffer_store_dword v9, off, s[8:11], 0 offset:380
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v9, 0x3e319356
+; GFX9-MUBUF-NEXT:    buffer_store_dword v10, off, s[8:11], 0 offset:372
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v10, 0x3eae29dc
+; GFX9-MUBUF-NEXT:    buffer_store_dword v11, off, s[8:11], 0 offset:364
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v11, 0x3efcd89f
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v12, 0xbf20e7f5
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v13, 0xbf3d349e
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v14, 0xbf523be3
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v15, 0xbf5f2ee3
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v16, 0xbf638e39
+; GFX9-MUBUF-NEXT:    v_and_b32_e32 v0, 0x1fc, v0
+; GFX9-MUBUF-NEXT:    buffer_store_dword v8, off, s[8:11], 0 offset:396
+; GFX9-MUBUF-NEXT:    buffer_store_dword v9, off, s[8:11], 0 offset:376
+; GFX9-MUBUF-NEXT:    buffer_store_dword v10, off, s[8:11], 0 offset:368
+; GFX9-MUBUF-NEXT:    buffer_store_dword v11, off, s[8:11], 0 offset:360
+; GFX9-MUBUF-NEXT:    buffer_store_dword v12, off, s[8:11], 0 offset:356
+; GFX9-MUBUF-NEXT:    buffer_store_dword v13, off, s[8:11], 0 offset:352
+; GFX9-MUBUF-NEXT:    buffer_store_dword v14, off, s[8:11], 0 offset:348
+; GFX9-MUBUF-NEXT:    buffer_store_dword v15, off, s[8:11], 0 offset:344
+; GFX9-MUBUF-NEXT:    buffer_store_dword v16, off, s[8:11], 0 offset:340
+; GFX9-MUBUF-NEXT:    buffer_store_dword v15, off, s[8:11], 0 offset:336
+; GFX9-MUBUF-NEXT:    buffer_store_dword v14, off, s[8:11], 0 offset:332
+; GFX9-MUBUF-NEXT:    buffer_store_dword v13, off, s[8:11], 0 offset:328
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v14, 0x3f20e7f5
+; GFX9-MUBUF-NEXT:    v_add_u32_e32 v1, 0x280, v0
+; GFX9-MUBUF-NEXT:    v_add_u32_e32 v0, 0x80, v0
+; GFX9-MUBUF-NEXT:    buffer_store_dword v14, off, s[8:11], 0 offset:324
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v17, 0x3f20e7f4
+; GFX9-MUBUF-NEXT:    buffer_load_dword v0, v0, s[8:11], 0 offen
+; GFX9-MUBUF-NEXT:    s_nop 0
+; GFX9-MUBUF-NEXT:    buffer_store_dword v17, off, s[8:11], 0 offset:960
+; GFX9-MUBUF-NEXT:    buffer_store_dword v11, off, s[8:11], 0 offset:956
+; GFX9-MUBUF-NEXT:    buffer_store_dword v10, off, s[8:11], 0 offset:952
+; GFX9-MUBUF-NEXT:    buffer_store_dword v9, off, s[8:11], 0 offset:948
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v17, 0x3703c499
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v18, 0x3f3d349c
+; GFX9-MUBUF-NEXT:    buffer_store_dword v17, off, s[8:11], 0 offset:944
+; GFX9-MUBUF-NEXT:    buffer_store_dword v8, off, s[8:11], 0 offset:940
+; GFX9-MUBUF-NEXT:    buffer_store_dword v7, off, s[8:11], 0 offset:936
+; GFX9-MUBUF-NEXT:    buffer_store_dword v2, off, s[8:11], 0 offset:932
+; GFX9-MUBUF-NEXT:    buffer_store_dword v12, off, s[8:11], 0 offset:928
+; GFX9-MUBUF-NEXT:    buffer_store_dword v18, off, s[8:11], 0 offset:924
+; GFX9-MUBUF-NEXT:    buffer_store_dword v13, off, s[8:11], 0 offset:920
+; GFX9-MUBUF-NEXT:    buffer_store_dword v4, off, s[8:11], 0 offset:916
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v18, 0xbf523be1
+; GFX9-MUBUF-NEXT:    buffer_store_dword v18, off, s[8:11], 0 offset:912
+; GFX9-MUBUF-NEXT:    buffer_store_dword v5, off, s[8:11], 0 offset:908
+; GFX9-MUBUF-NEXT:    buffer_store_dword v15, off, s[8:11], 0 offset:904
+; GFX9-MUBUF-NEXT:    buffer_store_dword v6, off, s[8:11], 0 offset:900
+; GFX9-MUBUF-NEXT:    buffer_store_dword v16, off, s[8:11], 0 offset:896
+; GFX9-MUBUF-NEXT:    buffer_store_dword v5, off, s[8:11], 0 offset:892
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v5, 0xbf5f2ee2
+; GFX9-MUBUF-NEXT:    buffer_store_dword v5, off, s[8:11], 0 offset:888
+; GFX9-MUBUF-NEXT:    buffer_store_dword v4, off, s[8:11], 0 offset:884
+; GFX9-MUBUF-NEXT:    buffer_store_dword v18, off, s[8:11], 0 offset:880
+; GFX9-MUBUF-NEXT:    buffer_store_dword v3, off, s[8:11], 0 offset:876
+; GFX9-MUBUF-NEXT:    buffer_store_dword v13, off, s[8:11], 0 offset:872
+; GFX9-MUBUF-NEXT:    buffer_store_dword v14, off, s[8:11], 0 offset:868
+; GFX9-MUBUF-NEXT:    buffer_store_dword v11, off, s[8:11], 0 offset:864
+; GFX9-MUBUF-NEXT:    buffer_store_dword v10, off, s[8:11], 0 offset:860
+; GFX9-MUBUF-NEXT:    buffer_store_dword v9, off, s[8:11], 0 offset:856
+; GFX9-MUBUF-NEXT:    buffer_store_dword v17, off, s[8:11], 0 offset:852
+; GFX9-MUBUF-NEXT:    buffer_store_dword v8, off, s[8:11], 0 offset:848
+; GFX9-MUBUF-NEXT:    buffer_store_dword v7, off, s[8:11], 0 offset:844
+; GFX9-MUBUF-NEXT:    buffer_store_dword v2, off, s[8:11], 0 offset:840
+; GFX9-MUBUF-NEXT:    buffer_store_dword v12, off, s[8:11], 0 offset:836
+; GFX9-MUBUF-NEXT:    buffer_load_dword v1, v1, s[8:11], 0 offen
+; GFX9-MUBUF-NEXT:    s_mov_b32 s2, s5
+; GFX9-MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-MUBUF-NEXT:    v_add_f32_e32 v0, v0, v1
+; GFX9-MUBUF-NEXT:    ; return to shader part epilog
+;
+; GFX10_W32-MUBUF-LABEL: gs_ir_uses_scratch_offset:
+; GFX10_W32-MUBUF:       ; %bb.0:
+; GFX10_W32-MUBUF-NEXT:    s_mov_b32 s8, SCRATCH_RSRC_DWORD0
+; GFX10_W32-MUBUF-NEXT:    s_mov_b32 s9, SCRATCH_RSRC_DWORD1
+; GFX10_W32-MUBUF-NEXT:    s_mov_b32 s10, -1
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v2, 0x3f3d349e
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v4, 0x3f5f2ee2
+; GFX10_W32-MUBUF-NEXT:    s_mov_b32 s11, 0x31c16000
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v3, 0x3f523be1
+; GFX10_W32-MUBUF-NEXT:    s_add_u32 s8, s8, s5
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v7, 0xbefcd8a3
+; GFX10_W32-MUBUF-NEXT:    s_addc_u32 s9, s9, 0
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v5, 0x3f638e37
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v8, 0xbefcd89f
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v9, 0xbeae29dc
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v1, off, s[8:11], 0 offset:448
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v2, off, s[8:11], 0 offset:444
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v3, off, s[8:11], 0 offset:440
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v4, off, s[8:11], 0 offset:436
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v5, off, s[8:11], 0 offset:432
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v4, off, s[8:11], 0 offset:428
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v3, off, s[8:11], 0 offset:424
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v2, off, s[8:11], 0 offset:420
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v1, off, s[8:11], 0 offset:416
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v7, off, s[8:11], 0 offset:412
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v8, off, s[8:11], 0 offset:408
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v9, off, s[8:11], 0 offset:404
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v9, off, s[8:11], 0 offset:400
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v1, 0xbe319356
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v7, 0xbe31934f
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v10, 0xb7043519
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v11, 0xb702e758
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v12, 0x3e31934f
+; GFX10_W32-MUBUF-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v1, off, s[8:11], 0 offset:396
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v7, off, s[8:11], 0 offset:392
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v10, off, s[8:11], 0 offset:388
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v11, off, s[8:11], 0 offset:384
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v12, off, s[8:11], 0 offset:380
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v7, 0x3e319356
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v10, 0x3eae29d8
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v11, 0x3eae29dc
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v12, 0x3efcd89c
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v13, 0x3efcd89f
+; GFX10_W32-MUBUF-NEXT:    v_and_b32_e32 v0, 0x1fc, v0
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v7, off, s[8:11], 0 offset:376
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v10, off, s[8:11], 0 offset:372
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v11, off, s[8:11], 0 offset:368
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v12, off, s[8:11], 0 offset:364
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v13, off, s[8:11], 0 offset:360
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v10, 0xbf20e7f5
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v15, 0xbf5f2ee3
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v12, 0xbf3d349e
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v14, 0xbf523be3
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v16, 0xbf638e39
+; GFX10_W32-MUBUF-NEXT:    v_add_nc_u32_e32 v6, 0x280, v0
+; GFX10_W32-MUBUF-NEXT:    v_add_nc_u32_e32 v0, 0x80, v0
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v10, off, s[8:11], 0 offset:356
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v12, off, s[8:11], 0 offset:352
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v14, off, s[8:11], 0 offset:348
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v15, off, s[8:11], 0 offset:344
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v16, off, s[8:11], 0 offset:340
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v17, 0x3f20e7f5
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v15, off, s[8:11], 0 offset:336
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v14, off, s[8:11], 0 offset:332
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v14, 0x3f20e7f4
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v12, off, s[8:11], 0 offset:328
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v17, off, s[8:11], 0 offset:324
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v18, 0x3703c499
+; GFX10_W32-MUBUF-NEXT:    buffer_load_dword v0, v0, s[8:11], 0 offen
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v14, off, s[8:11], 0 offset:960
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v13, off, s[8:11], 0 offset:956
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v11, off, s[8:11], 0 offset:952
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v7, off, s[8:11], 0 offset:948
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v18, off, s[8:11], 0 offset:944
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v1, off, s[8:11], 0 offset:940
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v14, 0x3f3d349c
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v19, 0xbf523be1
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v9, off, s[8:11], 0 offset:936
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v8, off, s[8:11], 0 offset:932
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v10, off, s[8:11], 0 offset:928
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v14, off, s[8:11], 0 offset:924
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v12, off, s[8:11], 0 offset:920
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v3, off, s[8:11], 0 offset:916
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v19, off, s[8:11], 0 offset:912
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v4, off, s[8:11], 0 offset:908
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v14, 0xbf5f2ee2
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v15, off, s[8:11], 0 offset:904
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v5, off, s[8:11], 0 offset:900
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v16, off, s[8:11], 0 offset:896
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v4, off, s[8:11], 0 offset:892
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v14, off, s[8:11], 0 offset:888
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v3, off, s[8:11], 0 offset:884
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v19, off, s[8:11], 0 offset:880
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v2, off, s[8:11], 0 offset:876
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v12, off, s[8:11], 0 offset:872
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v17, off, s[8:11], 0 offset:868
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v13, off, s[8:11], 0 offset:864
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v11, off, s[8:11], 0 offset:860
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v7, off, s[8:11], 0 offset:856
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v18, off, s[8:11], 0 offset:852
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v1, off, s[8:11], 0 offset:848
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v9, off, s[8:11], 0 offset:844
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v8, off, s[8:11], 0 offset:840
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v10, off, s[8:11], 0 offset:836
+; GFX10_W32-MUBUF-NEXT:    buffer_load_dword v1, v6, s[8:11], 0 offen
+; GFX10_W32-MUBUF-NEXT:    s_mov_b32 s2, s5
+; GFX10_W32-MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; GFX10_W32-MUBUF-NEXT:    v_add_f32_e32 v0, v0, v1
+; GFX10_W32-MUBUF-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10_W32-MUBUF-NEXT:    ; return to shader part epilog
+;
+; GFX10_W64-MUBUF-LABEL: gs_ir_uses_scratch_offset:
+; GFX10_W64-MUBUF:       ; %bb.0:
+; GFX10_W64-MUBUF-NEXT:    s_mov_b32 s8, SCRATCH_RSRC_DWORD0
+; GFX10_W64-MUBUF-NEXT:    s_mov_b32 s9, SCRATCH_RSRC_DWORD1
+; GFX10_W64-MUBUF-NEXT:    s_mov_b32 s10, -1
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v2, 0x3f3d349e
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v4, 0x3f5f2ee2
+; GFX10_W64-MUBUF-NEXT:    s_mov_b32 s11, 0x31e16000
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v3, 0x3f523be1
+; GFX10_W64-MUBUF-NEXT:    s_add_u32 s8, s8, s5
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v7, 0xbefcd8a3
+; GFX10_W64-MUBUF-NEXT:    s_addc_u32 s9, s9, 0
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v5, 0x3f638e37
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v8, 0xbefcd89f
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v9, 0xbeae29dc
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v1, off, s[8:11], 0 offset:448
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v2, off, s[8:11], 0 offset:444
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v3, off, s[8:11], 0 offset:440
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v4, off, s[8:11], 0 offset:436
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v5, off, s[8:11], 0 offset:432
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v4, off, s[8:11], 0 offset:428
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v3, off, s[8:11], 0 offset:424
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v2, off, s[8:11], 0 offset:420
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v1, off, s[8:11], 0 offset:416
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v7, off, s[8:11], 0 offset:412
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v8, off, s[8:11], 0 offset:408
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v9, off, s[8:11], 0 offset:404
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v9, off, s[8:11], 0 offset:400
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v1, 0xbe319356
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v7, 0xbe31934f
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v10, 0xb7043519
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v11, 0xb702e758
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v12, 0x3e31934f
+; GFX10_W64-MUBUF-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v1, off, s[8:11], 0 offset:396
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v7, off, s[8:11], 0 offset:392
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v10, off, s[8:11], 0 offset:388
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v11, off, s[8:11], 0 offset:384
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v12, off, s[8:11], 0 offset:380
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v7, 0x3e319356
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v10, 0x3eae29d8
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v11, 0x3eae29dc
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v12, 0x3efcd89c
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v13, 0x3efcd89f
+; GFX10_W64-MUBUF-NEXT:    v_and_b32_e32 v0, 0x1fc, v0
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v7, off, s[8:11], 0 offset:376
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v10, off, s[8:11], 0 offset:372
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v11, off, s[8:11], 0 offset:368
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v12, off, s[8:11], 0 offset:364
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v13, off, s[8:11], 0 offset:360
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v10, 0xbf20e7f5
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v15, 0xbf5f2ee3
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v12, 0xbf3d349e
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v14, 0xbf523be3
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v16, 0xbf638e39
+; GFX10_W64-MUBUF-NEXT:    v_add_nc_u32_e32 v6, 0x280, v0
+; GFX10_W64-MUBUF-NEXT:    v_add_nc_u32_e32 v0, 0x80, v0
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v10, off, s[8:11], 0 offset:356
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v12, off, s[8:11], 0 offset:352
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v14, off, s[8:11], 0 offset:348
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v15, off, s[8:11], 0 offset:344
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v16, off, s[8:11], 0 offset:340
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v17, 0x3f20e7f5
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v15, off, s[8:11], 0 offset:336
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v14, off, s[8:11], 0 offset:332
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v14, 0x3f20e7f4
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v12, off, s[8:11], 0 offset:328
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v17, off, s[8:11], 0 offset:324
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v18, 0x3703c499
+; GFX10_W64-MUBUF-NEXT:    buffer_load_dword v0, v0, s[8:11], 0 offen
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v14, off, s[8:11], 0 offset:960
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v13, off, s[8:11], 0 offset:956
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v11, off, s[8:11], 0 offset:952
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v7, off, s[8:11], 0 offset:948
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v18, off, s[8:11], 0 offset:944
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v1, off, s[8:11], 0 offset:940
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v14, 0x3f3d349c
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v19, 0xbf523be1
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v9, off, s[8:11], 0 offset:936
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v8, off, s[8:11], 0 offset:932
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v10, off, s[8:11], 0 offset:928
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v14, off, s[8:11], 0 offset:924
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v12, off, s[8:11], 0 offset:920
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v3, off, s[8:11], 0 offset:916
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v19, off, s[8:11], 0 offset:912
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v4, off, s[8:11], 0 offset:908
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v14, 0xbf5f2ee2
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v15, off, s[8:11], 0 offset:904
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v5, off, s[8:11], 0 offset:900
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v16, off, s[8:11], 0 offset:896
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v4, off, s[8:11], 0 offset:892
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v14, off, s[8:11], 0 offset:888
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v3, off, s[8:11], 0 offset:884
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v19, off, s[8:11], 0 offset:880
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v2, off, s[8:11], 0 offset:876
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v12, off, s[8:11], 0 offset:872
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v17, off, s[8:11], 0 offset:868
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v13, off, s[8:11], 0 offset:864
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v11, off, s[8:11], 0 offset:860
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v7, off, s[8:11], 0 offset:856
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v18, off, s[8:11], 0 offset:852
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v1, off, s[8:11], 0 offset:848
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v9, off, s[8:11], 0 offset:844
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v8, off, s[8:11], 0 offset:840
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v10, off, s[8:11], 0 offset:836
+; GFX10_W64-MUBUF-NEXT:    buffer_load_dword v1, v6, s[8:11], 0 offen
+; GFX10_W64-MUBUF-NEXT:    s_mov_b32 s2, s5
+; GFX10_W64-MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; GFX10_W64-MUBUF-NEXT:    v_add_f32_e32 v0, v0, v1
+; GFX10_W64-MUBUF-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10_W64-MUBUF-NEXT:    ; return to shader part epilog
+;
+; GFX9-FLATSCR-LABEL: gs_ir_uses_scratch_offset:
+; GFX9-FLATSCR:       ; %bb.0:
+; GFX9-FLATSCR-NEXT:    s_add_u32 flat_scratch_lo, s0, s5
+; GFX9-FLATSCR-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-FLATSCR-NEXT:    s_addc_u32 flat_scratch_hi, s1, 0
+; GFX9-FLATSCR-NEXT:    v_and_b32_e32 v23, 0x1fc, v0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v0, 0xbf20e7f4
+; GFX9-FLATSCR-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v5, 0x3f5f2ee2
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v6, 0x3f523be1
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v7, 0x3f3d349e
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:448
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v4, 0x3f638e37
+; GFX9-FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v1, v7
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v2, v6
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v3, v5
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s3, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v12, 0x3eae29dc
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v13, 0x3eae29d8
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v14, 0x3e319356
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v15, 0x3e31934f
+; GFX9-FLATSCR-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[4:7], vcc_hi offset:432
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s3 offset:416
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[12:15], vcc_lo offset:368
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v0, 0xbeae29dc
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v15, 0xbf3d349e
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v16, 0xbf20e7f5
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v17, 0x3efcd89f
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v18, 0x3efcd89c
+; GFX9-FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v2, 0xbefcd89f
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v3, 0xbefcd8a3
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v1, v0
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s1, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v8, 0xb702e758
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v9, 0xb7043519
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v10, 0xbe31934f
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v11, 0xbe319356
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s0, 0
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[15:18], vcc_hi offset:352
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v21, 0xbf523be3
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v18, 0xbf5f2ee3
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s1 offset:400
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[8:11], s0 offset:384
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v19, 0xbf638e39
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v20, v18
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s3, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v8, 0x3f20e7f5
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v9, v15
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v10, v21
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s1, 0
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[18:21], s3 offset:336
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[7:10], s1 offset:320
+; GFX9-FLATSCR-NEXT:    v_add_u32_e32 v1, 0x80, v23
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v21, v18
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v20, v4
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v22, v5
+; GFX9-FLATSCR-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX9-FLATSCR-NEXT:    scratch_load_dword v13, v1, off
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v1, 0x3f20e7f4
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s0, 0
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[19:22], vcc_lo offset:896
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v18, 0xbf523be1
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v21, 0x3f3d349c
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v19, v6
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v20, v15
+; GFX9-FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[1:4], s0 offset:960
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[18:21], vcc_hi offset:912
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s4, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v20, 0xbf5f2ee2
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v21, v5
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v10, v7
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v7, v17
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s3, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v3, 0x3703c499
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v4, v14
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v5, v12
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v6, v17
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s1, 0
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[18:21], s4 offset:880
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[7:10], s3 offset:864
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[3:6], s1 offset:944
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v8, v16
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v9, v2
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v10, v0
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s0, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v4, v11
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v5, v3
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v6, v14
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v7, v12
+; GFX9-FLATSCR-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v17, v2
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v18, v0
+; GFX9-FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[8:11], s0 offset:928
+; GFX9-FLATSCR-NEXT:    v_add_u32_e32 v1, 0x280, v23
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[4:7], vcc_lo offset:848
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[15:18], vcc_hi offset:832
+; GFX9-FLATSCR-NEXT:    scratch_load_dword v0, v1, off
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s2, s7
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    v_add_f32_e32 v0, v13, v0
+; GFX9-FLATSCR-NEXT:    ; return to shader part epilog
+;
+; GFX10-FLATSCR-LABEL: gs_ir_uses_scratch_offset:
+; GFX10-FLATSCR:       ; %bb.0:
+; GFX10-FLATSCR-NEXT:    s_add_u32 s0, s0, s5
+; GFX10-FLATSCR-NEXT:    s_addc_u32 s1, s1, 0
+; GFX10-FLATSCR-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
+; GFX10-FLATSCR-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v8, 0x3f3d349e
+; GFX10-FLATSCR-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v6, 0x3f5f2ee2
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v7, 0x3f523be1
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v9, 0xbeae29dc
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v5, 0x3f638e37
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[1:4], off offset:448
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v2, v8
+; GFX10-FLATSCR-NEXT:    v_and_b32_e32 v0, 0x1fc, v0
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v3, v7
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v4, v6
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v11, 0xbefcd89f
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v12, 0xbefcd8a3
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v10, v9
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v16, 0xbf3d349e
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v20, 0xbf5f2ee3
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v23, 0xbf523be3
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[5:8], off offset:432
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[1:4], off offset:416
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[9:12], off offset:400
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v12, 0x3eae29dc
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v14, 0x3e319356
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v18, 0x3efcd89f
+; GFX10-FLATSCR-NEXT:    v_add_nc_u32_e32 v39, 0x280, v0
+; GFX10-FLATSCR-NEXT:    v_add_nc_u32_e32 v35, 0x80, v0
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v0, 0xb702e758
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v1, 0xb7043519
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v2, 0xbe31934f
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v3, 0xbe319356
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v17, 0xbf20e7f5
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v13, 0x3eae29d8
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v15, 0x3e31934f
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v19, 0x3efcd89c
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v21, 0xbf638e39
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v22, v20
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v24, 0x3f20e7f5
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v25, v16
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v26, v23
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v4, 0x3f20e7f4
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v27, 0x3703c499
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v28, v14
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v29, v12
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v30, v18
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], off offset:384
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[12:15], off offset:368
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[16:19], off offset:352
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[20:23], off offset:336
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:320
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v0, v17
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v1, v11
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v2, v9
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v31, 0xbf523be1
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v34, 0x3f3d349c
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v32, v7
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v33, v16
+; GFX10-FLATSCR-NEXT:    scratch_load_dword v10, v35, off
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v35, v21
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v36, v5
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v37, v20
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v38, v6
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[4:7], off offset:960
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[27:30], off offset:944
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], off offset:928
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[31:34], off offset:912
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v33, 0xbf5f2ee2
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v34, v6
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v23, v18
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v26, v8
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v0, v3
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v1, v27
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v2, v14
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v3, v12
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v18, v11
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v19, v9
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[35:38], off offset:896
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[31:34], off offset:880
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:864
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], off offset:848
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[16:19], off offset:832
+; GFX10-FLATSCR-NEXT:    scratch_load_dword v0, v39, off
+; GFX10-FLATSCR-NEXT:    s_mov_b32 s2, s7
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    v_add_f32_e32 v0, v10, v0
+; GFX10-FLATSCR-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-FLATSCR-NEXT:    ; return to shader part epilog
+;
+; GFX9-FLATSCR-PAL-LABEL: gs_ir_uses_scratch_offset:
+; GFX9-FLATSCR-PAL:       ; %bb.0:
+; GFX9-FLATSCR-PAL-NEXT:    s_getpc_b64 s[0:1]
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s0, s8
+; GFX9-FLATSCR-PAL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
+; GFX9-FLATSCR-PAL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-FLATSCR-PAL-NEXT:    v_and_b32_e32 v23, 0x1fc, v0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v0, 0xbf20e7f4
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX9-FLATSCR-PAL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-FLATSCR-PAL-NEXT:    s_and_b32 s1, s1, 0xffff
+; GFX9-FLATSCR-PAL-NEXT:    s_add_u32 flat_scratch_lo, s0, s5
+; GFX9-FLATSCR-PAL-NEXT:    s_addc_u32 flat_scratch_hi, s1, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v5, 0x3f5f2ee2
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v6, 0x3f523be1
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v7, 0x3f3d349e
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:448
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v4, 0x3f638e37
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v1, v7
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v2, v6
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v3, v5
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s3, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v12, 0x3eae29dc
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v13, 0x3eae29d8
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v14, 0x3e319356
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v15, 0x3e31934f
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[4:7], vcc_hi offset:432
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s3 offset:416
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[12:15], vcc_lo offset:368
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v0, 0xbeae29dc
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v15, 0xbf3d349e
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v16, 0xbf20e7f5
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v17, 0x3efcd89f
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v18, 0x3efcd89c
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v2, 0xbefcd89f
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v3, 0xbefcd8a3
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v1, v0
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s1, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v8, 0xb702e758
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v9, 0xb7043519
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v10, 0xbe31934f
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v11, 0xbe319356
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s0, 0
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[15:18], vcc_hi offset:352
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v21, 0xbf523be3
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v18, 0xbf5f2ee3
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s1 offset:400
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[8:11], s0 offset:384
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v19, 0xbf638e39
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v20, v18
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s3, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v8, 0x3f20e7f5
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v9, v15
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v10, v21
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s1, 0
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[18:21], s3 offset:336
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[7:10], s1 offset:320
+; GFX9-FLATSCR-PAL-NEXT:    v_add_u32_e32 v1, 0x80, v23
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v21, v18
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v20, v4
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v22, v5
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX9-FLATSCR-PAL-NEXT:    scratch_load_dword v13, v1, off
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v1, 0x3f20e7f4
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s0, 0
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[19:22], vcc_lo offset:896
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v18, 0xbf523be1
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v21, 0x3f3d349c
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v19, v6
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v20, v15
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[1:4], s0 offset:960
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[18:21], vcc_hi offset:912
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s4, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v20, 0xbf5f2ee2
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v21, v5
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v10, v7
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v7, v17
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s3, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v3, 0x3703c499
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v4, v14
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v5, v12
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v6, v17
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s1, 0
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[18:21], s4 offset:880
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[7:10], s3 offset:864
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[3:6], s1 offset:944
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v8, v16
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v9, v2
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v10, v0
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s0, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v4, v11
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v5, v3
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v6, v14
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v7, v12
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v17, v2
+; GFX9-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v18, v0
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[8:11], s0 offset:928
+; GFX9-FLATSCR-PAL-NEXT:    v_add_u32_e32 v1, 0x280, v23
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[4:7], vcc_lo offset:848
+; GFX9-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[15:18], vcc_hi offset:832
+; GFX9-FLATSCR-PAL-NEXT:    scratch_load_dword v0, v1, off
+; GFX9-FLATSCR-PAL-NEXT:    s_mov_b32 s2, s5
+; GFX9-FLATSCR-PAL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-PAL-NEXT:    v_add_f32_e32 v0, v13, v0
+; GFX9-FLATSCR-PAL-NEXT:    ; return to shader part epilog
+;
+; GFX10-FLATSCR-PAL-LABEL: gs_ir_uses_scratch_offset:
+; GFX10-FLATSCR-PAL:       ; %bb.0:
+; GFX10-FLATSCR-PAL-NEXT:    s_getpc_b64 s[0:1]
+; GFX10-FLATSCR-PAL-NEXT:    s_mov_b32 s0, s8
+; GFX10-FLATSCR-PAL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
+; GFX10-FLATSCR-PAL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-FLATSCR-PAL-NEXT:    s_and_b32 s1, s1, 0xffff
+; GFX10-FLATSCR-PAL-NEXT:    s_add_u32 s0, s0, s5
+; GFX10-FLATSCR-PAL-NEXT:    s_addc_u32 s1, s1, 0
+; GFX10-FLATSCR-PAL-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
+; GFX10-FLATSCR-PAL-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v8, 0x3f3d349e
+; GFX10-FLATSCR-PAL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v6, 0x3f5f2ee2
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v7, 0x3f523be1
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v9, 0xbeae29dc
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v5, 0x3f638e37
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[1:4], off offset:448
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v2, v8
+; GFX10-FLATSCR-PAL-NEXT:    v_and_b32_e32 v0, 0x1fc, v0
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v3, v7
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v4, v6
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v11, 0xbefcd89f
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v12, 0xbefcd8a3
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v10, v9
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v16, 0xbf3d349e
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v20, 0xbf5f2ee3
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v23, 0xbf523be3
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[5:8], off offset:432
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[1:4], off offset:416
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[9:12], off offset:400
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v12, 0x3eae29dc
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v14, 0x3e319356
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v18, 0x3efcd89f
+; GFX10-FLATSCR-PAL-NEXT:    v_add_nc_u32_e32 v39, 0x280, v0
+; GFX10-FLATSCR-PAL-NEXT:    v_add_nc_u32_e32 v35, 0x80, v0
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v0, 0xb702e758
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v1, 0xb7043519
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v2, 0xbe31934f
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v3, 0xbe319356
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v17, 0xbf20e7f5
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v13, 0x3eae29d8
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v15, 0x3e31934f
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v19, 0x3efcd89c
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v21, 0xbf638e39
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v22, v20
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v24, 0x3f20e7f5
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v25, v16
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v26, v23
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v4, 0x3f20e7f4
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v27, 0x3703c499
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v28, v14
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v29, v12
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v30, v18
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], off offset:384
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[12:15], off offset:368
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[16:19], off offset:352
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[20:23], off offset:336
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:320
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v0, v17
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v1, v11
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v2, v9
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v31, 0xbf523be1
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v34, 0x3f3d349c
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v32, v7
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v33, v16
+; GFX10-FLATSCR-PAL-NEXT:    scratch_load_dword v10, v35, off
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v35, v21
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v36, v5
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v37, v20
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v38, v6
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[4:7], off offset:960
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[27:30], off offset:944
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], off offset:928
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[31:34], off offset:912
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v33, 0xbf5f2ee2
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v34, v6
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v23, v18
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v26, v8
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v0, v3
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v1, v27
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v2, v14
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v3, v12
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v18, v11
+; GFX10-FLATSCR-PAL-NEXT:    v_mov_b32_e32 v19, v9
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[35:38], off offset:896
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[31:34], off offset:880
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:864
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], off offset:848
+; GFX10-FLATSCR-PAL-NEXT:    scratch_store_dwordx4 off, v[16:19], off offset:832
+; GFX10-FLATSCR-PAL-NEXT:    scratch_load_dword v0, v39, off
+; GFX10-FLATSCR-PAL-NEXT:    s_mov_b32 s2, s5
+; GFX10-FLATSCR-PAL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-PAL-NEXT:    v_add_f32_e32 v0, v10, v0
+; GFX10-FLATSCR-PAL-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-FLATSCR-PAL-NEXT:    ; return to shader part epilog
   %v1 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0xBFEA477C60000000, float 0xBFEBE5DC60000000, float 0xBFEC71C720000000, float 0xBFEBE5DC60000000, float 0xBFEA477C60000000, float 0xBFE7A693C0000000, float 0xBFE41CFEA0000000, float 0x3FDF9B13E0000000, float 0x3FDF9B1380000000, float 0x3FD5C53B80000000, float 0x3FD5C53B00000000, float 0x3FC6326AC0000000, float 0x3FC63269E0000000, float 0xBEE05CEB00000000, float 0xBEE086A320000000, float 0xBFC63269E0000000, float 0xBFC6326AC0000000, float 0xBFD5C53B80000000, float 0xBFD5C53B80000000, float 0xBFDF9B13E0000000, float 0xBFDF9B1460000000, float 0xBFE41CFE80000000, float 0x3FE7A693C0000000, float 0x3FEA477C20000000, float 0x3FEBE5DC40000000, float 0x3FEC71C6E0000000, float 0x3FEBE5DC40000000, float 0x3FEA477C20000000, float 0x3FE7A693C0000000, float 0xBFE41CFE80000000>, i32 %idx
   %v2 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0x3FE7A693C0000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFEBE5DC40000000, float 0x3FEBE5DC40000000, float 0xBFEC71C720000000, float 0x3FEC71C6E0000000, float 0xBFEBE5DC60000000, float 0x3FEBE5DC40000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFE7A693C0000000, float 0x3FE7A69380000000, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFE80000000>, i32 %idx
   %f = fadd float %v1, %v2
@@ -368,3 +6949,10 @@ define amdgpu_gs <{i32, i32, i32, float}> @gs_ir_uses_scratch_offset(i32 inreg,
   %r2 = insertvalue <{i32, i32, i32, float}> %r1, float %f, 3
   ret <{i32, i32, i32, float}> %r2
 }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; FLATSCR: {{.*}}
+; GCN: {{.*}}
+; GFX9_10: {{.*}}
+; GFX9_10-MUBUF: {{.*}}
+; MUBUF: {{.*}}
+; SIVI: {{.*}}

diff  --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-to-vmem-scc-clobber.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-to-vmem-scc-clobber.mir
index 6fca8187f6e2..2fca0021f5f9 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-to-vmem-scc-clobber.mir
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-to-vmem-scc-clobber.mir
@@ -235,6 +235,7 @@ body:             |
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
   ; VMEM-GFX8-NEXT:   successors: %bb.2(0x80000000)
+  ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_NOP 0
   ; VMEM-GFX8-NEXT: {{  $}}
@@ -247,6 +248,7 @@ body:             |
     S_CBRANCH_SCC1 %bb.2, implicit $scc
 
   bb.1:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8
     S_NOP 0
 
   bb.2:
@@ -286,7 +288,7 @@ body:             |
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
   ; VMEM-GFX8-NEXT:   successors: %bb.2(0x80000000)
-  ; VMEM-GFX8-NEXT:   liveins: $sgpr8
+  ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_NOP 0
   ; VMEM-GFX8-NEXT: {{  $}}
@@ -301,7 +303,7 @@ body:             |
     S_CBRANCH_SCC1 %bb.2, implicit $scc
 
   bb.1:
-    liveins: $sgpr8
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8
     S_NOP 0
 
   bb.2:
@@ -343,6 +345,7 @@ body:             |
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
   ; VMEM-GFX8-NEXT:   successors: %bb.2(0x80000000)
+  ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8_sgpr9
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_NOP 0
   ; VMEM-GFX8-NEXT: {{  $}}
@@ -355,6 +358,7 @@ body:             |
     S_CBRANCH_SCC1 %bb.2, implicit $scc
 
   bb.1:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8_sgpr9
     S_NOP 0
 
   bb.2:
@@ -395,7 +399,7 @@ body:             |
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
   ; VMEM-GFX8-NEXT:   successors: %bb.2(0x80000000)
-  ; VMEM-GFX8-NEXT:   liveins: $sgpr8_sgpr9
+  ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8_sgpr9
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_NOP 0
   ; VMEM-GFX8-NEXT: {{  $}}
@@ -410,7 +414,7 @@ body:             |
     S_CBRANCH_SCC1 %bb.2, implicit $scc
 
   bb.1:
-    liveins: $sgpr8_sgpr9
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8_sgpr9
     S_NOP 0
 
   bb.2:
@@ -440,7 +444,7 @@ body:             |
   ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8, $sgpr9
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; VMEM-GFX8-NEXT:   $sgpr4_sgpr5 = S_MOV_B64 $exec
+  ; VMEM-GFX8-NEXT:   $sgpr6_sgpr7 = S_MOV_B64 $exec
   ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 1
   ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
   ; VMEM-GFX8-NEXT:   $vgpr0 = V_WRITELANE_B32 $sgpr8, 0, undef $vgpr0
@@ -448,22 +452,23 @@ body:             |
   ; VMEM-GFX8-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
   ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
   ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
-  ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
+  ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 killed $sgpr6_sgpr7
+  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
   ; VMEM-GFX8-NEXT:   $sgpr4_sgpr5 = S_MOV_B64 $exec
-  ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 1, implicit-def $vgpr1
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
-  ; VMEM-GFX8-NEXT:   $vgpr1 = V_WRITELANE_B32 $sgpr9, 0, undef $vgpr1
+  ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 1
   ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
-  ; VMEM-GFX8-NEXT:   $vgpr0 = V_MOV_B32_e32 8200, implicit $exec
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN killed $vgpr1, killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
-  ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr1
-  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr0 = V_WRITELANE_B32 $sgpr9, 0, undef $vgpr0
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
   ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
+  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
   ; VMEM-GFX8-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
   ; VMEM-GFX8-NEXT:   successors: %bb.2(0x80000000)
+  ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8, $sgpr9
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_NOP 0
   ; VMEM-GFX8-NEXT: {{  $}}
@@ -477,6 +482,7 @@ body:             |
     S_CBRANCH_SCC1 %bb.2, implicit $scc
 
   bb.1:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8, $sgpr9
     S_NOP 0
 
   bb.2:
@@ -502,7 +508,7 @@ body:             |
   ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; VMEM-GFX8-NEXT:   $sgpr4_sgpr5 = S_MOV_B64 $exec
+  ; VMEM-GFX8-NEXT:   $sgpr6_sgpr7 = S_MOV_B64 $exec
   ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 1
   ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
   ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
@@ -510,23 +516,23 @@ body:             |
   ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
   ; VMEM-GFX8-NEXT:   $sgpr8 = V_READLANE_B32 killed $vgpr0, 0
   ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
-  ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
+  ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 killed $sgpr6_sgpr7
+  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
   ; VMEM-GFX8-NEXT:   $sgpr4_sgpr5 = S_MOV_B64 $exec
-  ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 1, implicit-def $vgpr1
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 1
   ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
-  ; VMEM-GFX8-NEXT:   $vgpr0 = V_MOV_B32_e32 16392, implicit $exec
-  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
-  ; VMEM-GFX8-NEXT:   $sgpr9 = V_READLANE_B32 killed $vgpr1, 0
-  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
-  ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr1
-  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr1 = V_MOV_B32_e32 16392, implicit $exec
+  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $sgpr9 = V_READLANE_B32 killed $vgpr0, 0
   ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
+  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
   ; VMEM-GFX8-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
   ; VMEM-GFX8-NEXT:   successors: %bb.2(0x80000000)
-  ; VMEM-GFX8-NEXT:   liveins: $sgpr8, $sgpr9
+  ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8, $sgpr9
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_NOP 0
   ; VMEM-GFX8-NEXT: {{  $}}
@@ -542,7 +548,7 @@ body:             |
     S_CBRANCH_SCC1 %bb.2, implicit $scc
 
   bb.1:
-    liveins: $sgpr8, $sgpr9
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8, $sgpr9
     S_NOP 0
 
   bb.2:

diff  --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-vmem-large-frame.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-vmem-large-frame.mir
index 076e891b16bb..87ccf7a930ef 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-vmem-large-frame.mir
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-vmem-large-frame.mir
@@ -1,35 +1,10 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -amdgpu-spill-sgpr-to-vgpr=false -verify-machineinstrs -run-pass=si-lower-sgpr-spills,prologepilog -o - %s | FileCheck %s
 
 # Check that we allocate 2 emergency stack slots if we're spilling
 # SGPRs to memory and potentially have an offset larger than fits in
 # the addressing mode of the memory instructions.
 
-# CHECK-LABEL: name: test
-# CHECK: stack:
-# CHECK-NEXT: - { id: 0, name: '', type: spill-slot, offset: 8, size: 4, alignment: 4,
-# CHECK-NEXT: stack-id: default, callee-saved-register: '', callee-saved-restored: true,
-# CHECK-NEXT: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
-# CHECK-NEXT: - { id: 1, name: '', type: default, offset: 12, size: 4096, alignment: 4,
-# CHECK-NEXT: stack-id: default, callee-saved-register: '', callee-saved-restored: true,
-# CHECK-NEXT: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
-# CHECK-NEXT: - { id: 2, name: '', type: default, offset: 0, size: 4, alignment: 4,
-# CHECK-NEXT: stack-id: default, callee-saved-register: '', callee-saved-restored: true,
-# CHECK-NEXT: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
-# CHECK-NEXT: - { id: 3, name: '', type: default, offset: 4, size: 4, alignment: 4,
-# CHECK-NEXT: stack-id: default, callee-saved-register: '', callee-saved-restored: true,
-# CHECK-NEXT: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
-
-
-# CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
-# CHECK-NEXT: $vgpr1 = V_WRITELANE_B32 killed $sgpr10, 0, undef $vgpr1
-# CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-# CHECK-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
-
-
-# CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
-# CHECK-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
-# CHECK-NEXT: $sgpr10 = V_READLANE_B32 killed $vgpr1, 0
-# CHECK-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
 ---
 name:            test
 tracksRegLiveness: true
@@ -47,6 +22,25 @@ machineFunctionInfo:
 body:             |
   bb.0:
     liveins: $sgpr30_sgpr31, $sgpr10, $sgpr11
+    ; CHECK-LABEL: name: test
+    ; CHECK: liveins: $sgpr30_sgpr31, $sgpr10, $sgpr11
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
+    ; CHECK-NEXT: $sgpr6_sgpr7 = S_MOV_B64 $exec
+    ; CHECK-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr2
+    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+    ; CHECK-NEXT: $vgpr2 = V_WRITELANE_B32 killed $sgpr10, 0, undef $vgpr2
+    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+    ; CHECK-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+    ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7, implicit killed $vgpr2
+    ; CHECK-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec
+    ; CHECK-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr1
+    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+    ; CHECK-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; CHECK-NEXT: $sgpr10 = V_READLANE_B32 killed $vgpr1, 0
+    ; CHECK-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+    ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr1
+    ; CHECK-NEXT: S_SETPC_B64 $sgpr30_sgpr31, implicit $scc
     S_CMP_EQ_U32 0, 0, implicit-def $scc
     SI_SPILL_S32_SAVE killed $sgpr10, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
     renamable $sgpr10 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32

diff  --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir
index c0f70c7ef2ac..8c0211ac2506 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir
@@ -253,81 +253,81 @@ body:             |
     ; FLATSCR: $flat_scr_hi = S_ADDC_U32 $sgpr1, 0, implicit-def $scc, implicit $scc
     ; FLATSCR: renamable $sgpr12 = IMPLICIT_DEF
     ; FLATSCR: INLINEASM &"", 0 /* attdialect */, implicit-def $vgpr0
-    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
+    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
     ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
-    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
+    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
     ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
     ; FLATSCR: $vgpr0 = V_WRITELANE_B32 killed $sgpr12, 0, undef $vgpr0
     ; FLATSCR: $sgpr12 = S_MOV_B32 $exec_lo
     ; FLATSCR: $exec_lo = S_MOV_B32 1
-    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 4, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
+    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 4, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
     ; FLATSCR: $exec_lo = S_MOV_B32 -1
-    ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5)
+    ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5)
     ; FLATSCR: INLINEASM &"", 1 /* sideeffect attdialect */, implicit $vgpr0
     ; FLATSCR: $exec_lo = S_MOV_B32 killed $sgpr12
     ; FLATSCR: renamable $sgpr12 = IMPLICIT_DEF
-    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
+    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
     ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
-    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
+    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
     ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
     ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0
     ; FLATSCR: $sgpr12 = S_MOV_B32 $exec_lo
     ; FLATSCR: $exec_lo = S_MOV_B32 1
-    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 4, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
+    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 4, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
     ; FLATSCR: $exec_lo = S_MOV_B32 killed $sgpr12
     ; FLATSCR: $sgpr12 = V_READLANE_B32 killed $vgpr0, 0
-    ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5)
+    ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5)
     ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
-    ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5)
+    ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5)
     ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
     ; FLATSCR: renamable $sgpr12_sgpr13 = IMPLICIT_DEF
-    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
+    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
     ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
-    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
+    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
     ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
     ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13
     ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit killed $sgpr12_sgpr13
     ; FLATSCR: $sgpr12_sgpr13 = S_MOV_B64 $exec
     ; FLATSCR: $exec = S_MOV_B64 3
-    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 8, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5)
+    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 8, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5)
     ; FLATSCR: $exec = S_MOV_B64 -1
-    ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5)
+    ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5)
     ; FLATSCR: $exec = S_MOV_B64 killed $sgpr12_sgpr13
     ; FLATSCR: renamable $sgpr12_sgpr13 = IMPLICIT_DEF
-    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
+    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
     ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
-    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
+    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
     ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
     ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13
     ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13
     ; FLATSCR: $sgpr12_sgpr13 = S_MOV_B64 $exec
     ; FLATSCR: $exec = S_MOV_B64 3
-    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 8, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5)
+    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 8, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5)
     ; FLATSCR: $exec = S_MOV_B64 killed $sgpr12_sgpr13
     ; FLATSCR: $sgpr13 = V_READLANE_B32 $vgpr0, 1
     ; FLATSCR: $sgpr12 = V_READLANE_B32 killed $vgpr0, 0
-    ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5)
+    ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5)
     ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
-    ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5)
+    ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5)
     ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
     ; FLATSCR: renamable $sgpr12_sgpr13_sgpr14 = IMPLICIT_DEF
-    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
+    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
     ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
-    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
+    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
     ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
     ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14
     ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14
     ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14
     ; FLATSCR: $sgpr12_sgpr13 = S_MOV_B64 $exec
     ; FLATSCR: $exec = S_MOV_B64 7
-    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 16, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
+    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 16, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
     ; FLATSCR: $exec = S_MOV_B64 -1
-    ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5)
+    ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5)
     ; FLATSCR: $exec = S_MOV_B64 killed $sgpr12_sgpr13
     ; FLATSCR: renamable $sgpr12_sgpr13_sgpr14_sgpr15 = IMPLICIT_DEF
-    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
+    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
     ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
-    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
+    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
     ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
     ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15
     ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15
@@ -335,14 +335,14 @@ body:             |
     ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15
     ; FLATSCR: $sgpr12_sgpr13 = S_MOV_B64 $exec
     ; FLATSCR: $exec = S_MOV_B64 15
-    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 28, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.3, addrspace 5)
+    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 28, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.3, addrspace 5)
     ; FLATSCR: $exec = S_MOV_B64 -1
-    ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5)
+    ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5)
     ; FLATSCR: $exec = S_MOV_B64 killed $sgpr12_sgpr13
     ; FLATSCR: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 = IMPLICIT_DEF
-    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
+    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
     ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
-    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
+    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
     ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
     ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
     ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
@@ -351,14 +351,14 @@ body:             |
     ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr16, 4, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
     ; FLATSCR: $sgpr12_sgpr13 = S_MOV_B64 $exec
     ; FLATSCR: $exec = S_MOV_B64 31
-    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 44, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.4, addrspace 5)
+    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 44, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.4, addrspace 5)
     ; FLATSCR: $exec = S_MOV_B64 -1
-    ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5)
+    ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5)
     ; FLATSCR: $exec = S_MOV_B64 killed $sgpr12_sgpr13
     ; FLATSCR: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = IMPLICIT_DEF
-    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
+    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
     ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
-    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
+    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
     ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
     ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
     ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
@@ -370,14 +370,14 @@ body:             |
     ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr19, 7, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
     ; FLATSCR: $sgpr12_sgpr13 = S_MOV_B64 $exec
     ; FLATSCR: $exec = S_MOV_B64 255
-    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 64, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.5, addrspace 5)
+    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 64, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.5, addrspace 5)
     ; FLATSCR: $exec = S_MOV_B64 -1
-    ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5)
+    ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5)
     ; FLATSCR: $exec = S_MOV_B64 killed $sgpr12_sgpr13
     ; FLATSCR: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 = IMPLICIT_DEF
-    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
+    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
     ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
-    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
+    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
     ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
     ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
     ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
@@ -397,14 +397,14 @@ body:             |
     ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr27, 15, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
     ; FLATSCR: $sgpr12_sgpr13 = S_MOV_B64 $exec
     ; FLATSCR: $exec = S_MOV_B64 65535
-    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 96, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.6, addrspace 5)
+    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 96, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.6, addrspace 5)
     ; FLATSCR: $exec = S_MOV_B64 -1
-    ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5)
+    ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5)
     ; FLATSCR: $exec = S_MOV_B64 killed $sgpr12_sgpr13
     ; FLATSCR: renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = IMPLICIT_DEF
-    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
+    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
     ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
-    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
+    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
     ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
     ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr64, 0, undef $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
     ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr65, 1, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
@@ -440,9 +440,9 @@ body:             |
     ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr95, 31, $vgpr0, implicit killed $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
     ; FLATSCR: $sgpr64_sgpr65 = S_MOV_B64 $exec
     ; FLATSCR: $exec = S_MOV_B64 4294967295
-    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 160, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.7, addrspace 5)
+    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 160, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.7, addrspace 5)
     ; FLATSCR: $exec = S_MOV_B64 -1
-    ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5)
+    ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5)
     ; FLATSCR: $exec = S_MOV_B64 killed $sgpr64_sgpr65
     ; GCN64-MUBUF-LABEL: name: check_spill
     ; GCN64-MUBUF: liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr11
@@ -455,13 +455,13 @@ body:             |
     ; GCN64-MUBUF-NEXT: $sgpr28 = S_ADD_U32 $sgpr28, $sgpr11, implicit-def $scc, implicit-def $sgpr28_sgpr29_sgpr30_sgpr31
     ; GCN64-MUBUF-NEXT: $sgpr29 = S_ADDC_U32 $sgpr29, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr28_sgpr29_sgpr30_sgpr31
     ; GCN64-MUBUF-NEXT: renamable $sgpr12 = IMPLICIT_DEF
-    ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
-    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr12, 0, undef $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
+    ; GCN64-MUBUF-NEXT: $sgpr6_sgpr7 = S_MOV_B64 $exec
+    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr1
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 killed $sgpr12, 0, undef $vgpr1
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7, implicit killed $vgpr1
     ; GCN64-MUBUF-NEXT: renamable $sgpr12 = IMPLICIT_DEF
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0
@@ -471,33 +471,33 @@ body:             |
     ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-MUBUF-NEXT: renamable $sgpr12_sgpr13 = IMPLICIT_DEF
-    ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
-    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit killed $sgpr12_sgpr13
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 8, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
+    ; GCN64-MUBUF-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec
+    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr2
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr2, implicit $sgpr12_sgpr13
+    ; GCN64-MUBUF-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr13, 1, $vgpr2, implicit killed $sgpr12_sgpr13
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 8, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr2
     ; GCN64-MUBUF-NEXT: renamable $sgpr12_sgpr13 = IMPLICIT_DEF
-    ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
-    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 8, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
+    ; GCN64-MUBUF-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec
+    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr3
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr3 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr3, implicit $sgpr12_sgpr13
+    ; GCN64-MUBUF-NEXT: $vgpr3 = V_WRITELANE_B32 $sgpr13, 1, $vgpr3, implicit $sgpr12_sgpr13
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 8, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr3
     ; GCN64-MUBUF-NEXT: renamable $sgpr12_sgpr13_sgpr14 = IMPLICIT_DEF
-    ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
-    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 7, implicit-def $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 16, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
+    ; GCN64-MUBUF-NEXT: $sgpr6_sgpr7 = S_MOV_B64 $exec
+    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 7, implicit-def $vgpr1
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr1, implicit $sgpr12_sgpr13_sgpr14
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr13, 1, $vgpr1, implicit $sgpr12_sgpr13_sgpr14
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr14, 2, $vgpr1, implicit killed $sgpr12_sgpr13_sgpr14
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 16, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7, implicit killed $vgpr1
     ; GCN64-MUBUF-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15 = IMPLICIT_DEF
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 15, implicit-def $vgpr0
@@ -510,32 +510,32 @@ body:             |
     ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-MUBUF-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 = IMPLICIT_DEF
-    ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
-    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 31, implicit-def $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr16, 4, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 44, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
+    ; GCN64-MUBUF-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec
+    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 31, implicit-def $vgpr2
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr2, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+    ; GCN64-MUBUF-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr13, 1, $vgpr2, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+    ; GCN64-MUBUF-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr14, 2, $vgpr2, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+    ; GCN64-MUBUF-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr15, 3, $vgpr2, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+    ; GCN64-MUBUF-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr16, 4, $vgpr2, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 44, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr2
     ; GCN64-MUBUF-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = IMPLICIT_DEF
-    ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
-    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 255, implicit-def $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr16, 4, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr17, 5, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr18, 6, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr19, 7, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 64, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
+    ; GCN64-MUBUF-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec
+    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 255, implicit-def $vgpr1
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr13, 1, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr14, 2, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr15, 3, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr16, 4, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr17, 5, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr18, 6, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr19, 7, $vgpr1, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 64, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr1
     ; GCN64-MUBUF-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 = IMPLICIT_DEF
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 65535, implicit-def $vgpr0
@@ -560,44 +560,44 @@ body:             |
     ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-MUBUF-NEXT: renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = IMPLICIT_DEF
-    ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
-    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 4294967295, implicit-def $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr64, 0, undef $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr65, 1, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr66, 2, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr67, 3, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr68, 4, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr69, 5, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr70, 6, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr71, 7, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr72, 8, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr73, 9, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr74, 10, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr75, 11, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr76, 12, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr77, 13, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr78, 14, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr79, 15, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr80, 16, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr81, 17, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr82, 18, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr83, 19, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr84, 20, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr85, 21, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr86, 22, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr87, 23, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr88, 24, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr89, 25, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr90, 26, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr91, 27, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr92, 28, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr93, 29, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr94, 30, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr95, 31, $vgpr0, implicit killed $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 160, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
+    ; GCN64-MUBUF-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec
+    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 4294967295, implicit-def $vgpr1
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr64, 0, undef $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr65, 1, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr66, 2, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr67, 3, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr68, 4, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr69, 5, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr70, 6, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr71, 7, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr72, 8, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr73, 9, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr74, 10, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr75, 11, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr76, 12, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr77, 13, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr78, 14, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr79, 15, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr80, 16, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr81, 17, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr82, 18, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr83, 19, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr84, 20, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr85, 21, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr86, 22, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr87, 23, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr88, 24, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr89, 25, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr90, 26, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr91, 27, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr92, 28, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr93, 29, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr94, 30, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr95, 31, $vgpr1, implicit killed $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 160, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr1
     ; GCN64-MUBUF-NEXT: renamable $sgpr12 = IMPLICIT_DEF
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0
@@ -618,13 +618,13 @@ body:             |
     ; GCN32-MUBUF-NEXT: $sgpr96 = S_ADD_U32 $sgpr96, $sgpr11, implicit-def $scc, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99
     ; GCN32-MUBUF-NEXT: $sgpr97 = S_ADDC_U32 $sgpr97, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99
     ; GCN32-MUBUF-NEXT: renamable $sgpr12 = IMPLICIT_DEF
-    ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
-    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 1, implicit-def $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr12, 0, undef $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
+    ; GCN32-MUBUF-NEXT: $sgpr3 = S_MOV_B32 $exec_lo
+    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 1, implicit-def $vgpr1
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 killed $sgpr12, 0, undef $vgpr1
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr3, implicit killed $vgpr1
     ; GCN32-MUBUF-NEXT: renamable $sgpr12 = IMPLICIT_DEF
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 1, implicit-def $vgpr0
@@ -634,33 +634,33 @@ body:             |
     ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
     ; GCN32-MUBUF-NEXT: renamable $sgpr12_sgpr13 = IMPLICIT_DEF
-    ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
-    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 3, implicit-def $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit killed $sgpr12_sgpr13
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
+    ; GCN32-MUBUF-NEXT: $sgpr2 = S_MOV_B32 $exec_lo
+    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 3, implicit-def $vgpr2
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr2, implicit $sgpr12_sgpr13
+    ; GCN32-MUBUF-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr13, 1, $vgpr2, implicit killed $sgpr12_sgpr13
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr2, implicit killed $vgpr2
     ; GCN32-MUBUF-NEXT: renamable $sgpr12_sgpr13 = IMPLICIT_DEF
-    ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
-    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 3, implicit-def $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
+    ; GCN32-MUBUF-NEXT: $sgpr1 = S_MOV_B32 $exec_lo
+    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 3, implicit-def $vgpr3
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr3 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr3, implicit $sgpr12_sgpr13
+    ; GCN32-MUBUF-NEXT: $vgpr3 = V_WRITELANE_B32 $sgpr13, 1, $vgpr3, implicit $sgpr12_sgpr13
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1, implicit killed $vgpr3
     ; GCN32-MUBUF-NEXT: renamable $sgpr12_sgpr13_sgpr14 = IMPLICIT_DEF
-    ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
-    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 7, implicit-def $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 16, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
+    ; GCN32-MUBUF-NEXT: $sgpr3 = S_MOV_B32 $exec_lo
+    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 7, implicit-def $vgpr1
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr1, implicit $sgpr12_sgpr13_sgpr14
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr13, 1, $vgpr1, implicit $sgpr12_sgpr13_sgpr14
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr14, 2, $vgpr1, implicit killed $sgpr12_sgpr13_sgpr14
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 16, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr3, implicit killed $vgpr1
     ; GCN32-MUBUF-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15 = IMPLICIT_DEF
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 15, implicit-def $vgpr0
@@ -673,32 +673,32 @@ body:             |
     ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
     ; GCN32-MUBUF-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 = IMPLICIT_DEF
-    ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
-    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 31, implicit-def $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr16, 4, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 44, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
+    ; GCN32-MUBUF-NEXT: $sgpr2 = S_MOV_B32 $exec_lo
+    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 31, implicit-def $vgpr2
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr2, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+    ; GCN32-MUBUF-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr13, 1, $vgpr2, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+    ; GCN32-MUBUF-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr14, 2, $vgpr2, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+    ; GCN32-MUBUF-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr15, 3, $vgpr2, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+    ; GCN32-MUBUF-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr16, 4, $vgpr2, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 44, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr2, implicit killed $vgpr2
     ; GCN32-MUBUF-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = IMPLICIT_DEF
-    ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
-    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 255, implicit-def $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr16, 4, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr17, 5, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr18, 6, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr19, 7, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 64, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
+    ; GCN32-MUBUF-NEXT: $sgpr1 = S_MOV_B32 $exec_lo
+    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 255, implicit-def $vgpr1
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr13, 1, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr14, 2, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr15, 3, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr16, 4, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr17, 5, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr18, 6, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr19, 7, $vgpr1, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 64, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1, implicit killed $vgpr1
     ; GCN32-MUBUF-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 = IMPLICIT_DEF
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 65535, implicit-def $vgpr0
@@ -723,44 +723,44 @@ body:             |
     ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
     ; GCN32-MUBUF-NEXT: renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = IMPLICIT_DEF
-    ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
-    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 4294967295, implicit-def $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr64, 0, undef $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr65, 1, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr66, 2, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr67, 3, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr68, 4, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr69, 5, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr70, 6, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr71, 7, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr72, 8, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr73, 9, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr74, 10, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr75, 11, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr76, 12, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr77, 13, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr78, 14, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr79, 15, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr80, 16, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr81, 17, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr82, 18, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr83, 19, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr84, 20, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr85, 21, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr86, 22, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr87, 23, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr88, 24, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr89, 25, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr90, 26, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr91, 27, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr92, 28, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr93, 29, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr94, 30, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr95, 31, $vgpr0, implicit killed $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 160, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
+    ; GCN32-MUBUF-NEXT: $sgpr2 = S_MOV_B32 $exec_lo
+    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 4294967295, implicit-def $vgpr1
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr64, 0, undef $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr65, 1, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr66, 2, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr67, 3, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr68, 4, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr69, 5, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr70, 6, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr71, 7, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr72, 8, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr73, 9, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr74, 10, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr75, 11, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr76, 12, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr77, 13, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr78, 14, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr79, 15, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr80, 16, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr81, 17, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr82, 18, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr83, 19, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr84, 20, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr85, 21, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr86, 22, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr87, 23, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr88, 24, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr89, 25, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr90, 26, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr91, 27, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr92, 28, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr93, 29, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr94, 30, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr95, 31, $vgpr1, implicit killed $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 160, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr2, implicit killed $vgpr1
     ; GCN32-MUBUF-NEXT: renamable $sgpr12 = IMPLICIT_DEF
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 1, implicit-def $vgpr0
@@ -777,51 +777,51 @@ body:             |
     ; GCN64-FLATSCR-NEXT: $flat_scr_lo = S_ADD_U32 $sgpr0, $sgpr11, implicit-def $scc
     ; GCN64-FLATSCR-NEXT: $flat_scr_hi = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc
     ; GCN64-FLATSCR-NEXT: renamable $sgpr12 = IMPLICIT_DEF
-    ; GCN64-FLATSCR-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec
-    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr12, 0, undef $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0
+    ; GCN64-FLATSCR-NEXT: $sgpr6_sgpr7 = S_MOV_B64 $exec
+    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr1
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 killed $sgpr12, 0, undef $vgpr1
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7, implicit killed $vgpr1
     ; GCN64-FLATSCR-NEXT: renamable $sgpr12 = IMPLICIT_DEF
-    ; GCN64-FLATSCR-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec
+    ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0
     ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0
     ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0
+    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-FLATSCR-NEXT: renamable $sgpr12_sgpr13 = IMPLICIT_DEF
-    ; GCN64-FLATSCR-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec
-    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit killed $sgpr12_sgpr13
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0
+    ; GCN64-FLATSCR-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec
+    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr2
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr2, implicit $sgpr12_sgpr13
+    ; GCN64-FLATSCR-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr13, 1, $vgpr2, implicit killed $sgpr12_sgpr13
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr2
     ; GCN64-FLATSCR-NEXT: renamable $sgpr12_sgpr13 = IMPLICIT_DEF
     ; GCN64-FLATSCR-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec
-    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0
+    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr3
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr3 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr3, implicit $sgpr12_sgpr13
+    ; GCN64-FLATSCR-NEXT: $vgpr3 = V_WRITELANE_B32 $sgpr13, 1, $vgpr3, implicit $sgpr12_sgpr13
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr3
     ; GCN64-FLATSCR-NEXT: renamable $sgpr12_sgpr13_sgpr14 = IMPLICIT_DEF
-    ; GCN64-FLATSCR-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec
-    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 7, implicit-def $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0
+    ; GCN64-FLATSCR-NEXT: $sgpr6_sgpr7 = S_MOV_B64 $exec
+    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 7, implicit-def $vgpr1
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr1, implicit $sgpr12_sgpr13_sgpr14
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr13, 1, $vgpr1, implicit $sgpr12_sgpr13_sgpr14
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr14, 2, $vgpr1, implicit killed $sgpr12_sgpr13_sgpr14
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7, implicit killed $vgpr1
     ; GCN64-FLATSCR-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15 = IMPLICIT_DEF
-    ; GCN64-FLATSCR-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec
+    ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 15, implicit-def $vgpr0
     ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15
@@ -830,36 +830,36 @@ body:             |
     ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15
     ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 28, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.3, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0
+    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-FLATSCR-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 = IMPLICIT_DEF
-    ; GCN64-FLATSCR-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec
-    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 31, implicit-def $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr16, 4, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 44, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.4, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0
+    ; GCN64-FLATSCR-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec
+    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 31, implicit-def $vgpr2
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr2, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+    ; GCN64-FLATSCR-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr13, 1, $vgpr2, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+    ; GCN64-FLATSCR-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr14, 2, $vgpr2, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+    ; GCN64-FLATSCR-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr15, 3, $vgpr2, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+    ; GCN64-FLATSCR-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr16, 4, $vgpr2, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr33, 44, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.4, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr2
     ; GCN64-FLATSCR-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = IMPLICIT_DEF
     ; GCN64-FLATSCR-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec
-    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 255, implicit-def $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr16, 4, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr17, 5, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr18, 6, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr19, 7, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 64, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.5, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0
+    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 255, implicit-def $vgpr1
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr13, 1, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr14, 2, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr15, 3, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr16, 4, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr17, 5, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr18, 6, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr19, 7, $vgpr1, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 64, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.5, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr1
     ; GCN64-FLATSCR-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 = IMPLICIT_DEF
-    ; GCN64-FLATSCR-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec
+    ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 65535, implicit-def $vgpr0
     ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
@@ -880,55 +880,55 @@ body:             |
     ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr27, 15, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
     ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 96, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.6, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0
+    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-FLATSCR-NEXT: renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = IMPLICIT_DEF
-    ; GCN64-FLATSCR-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec
-    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 4294967295, implicit-def $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr64, 0, undef $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr65, 1, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr66, 2, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr67, 3, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr68, 4, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr69, 5, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr70, 6, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr71, 7, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr72, 8, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr73, 9, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr74, 10, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr75, 11, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr76, 12, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr77, 13, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr78, 14, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr79, 15, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr80, 16, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr81, 17, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr82, 18, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr83, 19, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr84, 20, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr85, 21, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr86, 22, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr87, 23, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr88, 24, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr89, 25, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr90, 26, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr91, 27, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr92, 28, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr93, 29, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr94, 30, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr95, 31, $vgpr0, implicit killed $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 160, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.7, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0
+    ; GCN64-FLATSCR-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec
+    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 4294967295, implicit-def $vgpr1
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr64, 0, undef $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr65, 1, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr66, 2, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr67, 3, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr68, 4, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr69, 5, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr70, 6, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr71, 7, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr72, 8, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr73, 9, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr74, 10, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr75, 11, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr76, 12, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr77, 13, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr78, 14, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr79, 15, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr80, 16, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr81, 17, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr82, 18, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr83, 19, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr84, 20, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr85, 21, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr86, 22, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr87, 23, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr88, 24, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr89, 25, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr90, 26, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr91, 27, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr92, 28, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr93, 29, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr94, 30, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr95, 31, $vgpr1, implicit killed $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 160, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.7, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr1
     ; GCN64-FLATSCR-NEXT: renamable $sgpr12 = IMPLICIT_DEF
-    ; GCN64-FLATSCR-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec
+    ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0
     ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0
-    ; GCN64-FLATSCR-NEXT: $sgpr9 = S_ADD_I32 $sgpr33, 4096, implicit-def dead $scc
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, killed $sgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, align 4096, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $sgpr2 = S_ADD_I32 $sgpr33, 4096, implicit-def dead $scc
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, killed $sgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, align 4096, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0
+    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     renamable $sgpr12 = IMPLICIT_DEF
     SI_SPILL_S32_SAVE killed $sgpr12, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
 
@@ -1011,30 +1011,30 @@ body:             |
     ; GCN64-MUBUF-NEXT: $sgpr31 = S_MOV_B32 14680064, implicit-def $sgpr28_sgpr29_sgpr30_sgpr31
     ; GCN64-MUBUF-NEXT: $sgpr28 = S_ADD_U32 $sgpr28, $sgpr11, implicit-def $scc, implicit-def $sgpr28_sgpr29_sgpr30_sgpr31
     ; GCN64-MUBUF-NEXT: $sgpr29 = S_ADDC_U32 $sgpr29, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr28_sgpr29_sgpr30_sgpr31
-    ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
-    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 killed $vgpr0, 0
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
-    ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
-    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 8, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13
-    ; GCN64-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 killed $vgpr0, 1
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
-    ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
-    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 7, implicit-def $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 16, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14
-    ; GCN64-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 $vgpr0, 1
-    ; GCN64-MUBUF-NEXT: $sgpr14 = V_READLANE_B32 killed $vgpr0, 2
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
+    ; GCN64-MUBUF-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec
+    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr1
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 killed $vgpr1, 0
+    ; GCN64-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr1
+    ; GCN64-MUBUF-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec
+    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr2
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 8, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr2, 0, implicit-def $sgpr12_sgpr13
+    ; GCN64-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 killed $vgpr2, 1
+    ; GCN64-MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr2
+    ; GCN64-MUBUF-NEXT: $sgpr6_sgpr7 = S_MOV_B64 $exec
+    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 7, implicit-def $vgpr3
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 16, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr3, 0, implicit-def $sgpr12_sgpr13_sgpr14
+    ; GCN64-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 $vgpr3, 1
+    ; GCN64-MUBUF-NEXT: $sgpr14 = V_READLANE_B32 killed $vgpr3, 2
+    ; GCN64-MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7, implicit killed $vgpr3
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 15, implicit-def $vgpr0
     ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
@@ -1045,31 +1045,31 @@ body:             |
     ; GCN64-MUBUF-NEXT: $sgpr15 = V_READLANE_B32 killed $vgpr0, 3
     ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
-    ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
-    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 31, implicit-def $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 44, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
-    ; GCN64-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 $vgpr0, 1
-    ; GCN64-MUBUF-NEXT: $sgpr14 = V_READLANE_B32 $vgpr0, 2
-    ; GCN64-MUBUF-NEXT: $sgpr15 = V_READLANE_B32 $vgpr0, 3
-    ; GCN64-MUBUF-NEXT: $sgpr16 = V_READLANE_B32 killed $vgpr0, 4
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
-    ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
-    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 255, implicit-def $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 64, 0, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN64-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 $vgpr0, 1
-    ; GCN64-MUBUF-NEXT: $sgpr14 = V_READLANE_B32 $vgpr0, 2
-    ; GCN64-MUBUF-NEXT: $sgpr15 = V_READLANE_B32 $vgpr0, 3
-    ; GCN64-MUBUF-NEXT: $sgpr16 = V_READLANE_B32 $vgpr0, 4
-    ; GCN64-MUBUF-NEXT: $sgpr17 = V_READLANE_B32 $vgpr0, 5
-    ; GCN64-MUBUF-NEXT: $sgpr18 = V_READLANE_B32 $vgpr0, 6
-    ; GCN64-MUBUF-NEXT: $sgpr19 = V_READLANE_B32 killed $vgpr0, 7
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
+    ; GCN64-MUBUF-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec
+    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 31, implicit-def $vgpr1
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 44, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr1, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+    ; GCN64-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 $vgpr1, 1
+    ; GCN64-MUBUF-NEXT: $sgpr14 = V_READLANE_B32 $vgpr1, 2
+    ; GCN64-MUBUF-NEXT: $sgpr15 = V_READLANE_B32 $vgpr1, 3
+    ; GCN64-MUBUF-NEXT: $sgpr16 = V_READLANE_B32 killed $vgpr1, 4
+    ; GCN64-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr1
+    ; GCN64-MUBUF-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec
+    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 255, implicit-def $vgpr2
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 64, 0, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr2, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN64-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 $vgpr2, 1
+    ; GCN64-MUBUF-NEXT: $sgpr14 = V_READLANE_B32 $vgpr2, 2
+    ; GCN64-MUBUF-NEXT: $sgpr15 = V_READLANE_B32 $vgpr2, 3
+    ; GCN64-MUBUF-NEXT: $sgpr16 = V_READLANE_B32 $vgpr2, 4
+    ; GCN64-MUBUF-NEXT: $sgpr17 = V_READLANE_B32 $vgpr2, 5
+    ; GCN64-MUBUF-NEXT: $sgpr18 = V_READLANE_B32 $vgpr2, 6
+    ; GCN64-MUBUF-NEXT: $sgpr19 = V_READLANE_B32 killed $vgpr2, 7
+    ; GCN64-MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr2
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 65535, implicit-def $vgpr0
     ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
@@ -1092,44 +1092,44 @@ body:             |
     ; GCN64-MUBUF-NEXT: $sgpr27 = V_READLANE_B32 killed $vgpr0, 15
     ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
-    ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
-    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 4294967295, implicit-def $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 160, 0, 0, implicit $exec :: (load (s32) from %stack.7, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $sgpr64 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $sgpr65 = V_READLANE_B32 $vgpr0, 1
-    ; GCN64-MUBUF-NEXT: $sgpr66 = V_READLANE_B32 $vgpr0, 2
-    ; GCN64-MUBUF-NEXT: $sgpr67 = V_READLANE_B32 $vgpr0, 3
-    ; GCN64-MUBUF-NEXT: $sgpr68 = V_READLANE_B32 $vgpr0, 4
-    ; GCN64-MUBUF-NEXT: $sgpr69 = V_READLANE_B32 $vgpr0, 5
-    ; GCN64-MUBUF-NEXT: $sgpr70 = V_READLANE_B32 $vgpr0, 6
-    ; GCN64-MUBUF-NEXT: $sgpr71 = V_READLANE_B32 $vgpr0, 7
-    ; GCN64-MUBUF-NEXT: $sgpr72 = V_READLANE_B32 $vgpr0, 8
-    ; GCN64-MUBUF-NEXT: $sgpr73 = V_READLANE_B32 $vgpr0, 9
-    ; GCN64-MUBUF-NEXT: $sgpr74 = V_READLANE_B32 $vgpr0, 10
-    ; GCN64-MUBUF-NEXT: $sgpr75 = V_READLANE_B32 $vgpr0, 11
-    ; GCN64-MUBUF-NEXT: $sgpr76 = V_READLANE_B32 $vgpr0, 12
-    ; GCN64-MUBUF-NEXT: $sgpr77 = V_READLANE_B32 $vgpr0, 13
-    ; GCN64-MUBUF-NEXT: $sgpr78 = V_READLANE_B32 $vgpr0, 14
-    ; GCN64-MUBUF-NEXT: $sgpr79 = V_READLANE_B32 $vgpr0, 15
-    ; GCN64-MUBUF-NEXT: $sgpr80 = V_READLANE_B32 $vgpr0, 16
-    ; GCN64-MUBUF-NEXT: $sgpr81 = V_READLANE_B32 $vgpr0, 17
-    ; GCN64-MUBUF-NEXT: $sgpr82 = V_READLANE_B32 $vgpr0, 18
-    ; GCN64-MUBUF-NEXT: $sgpr83 = V_READLANE_B32 $vgpr0, 19
-    ; GCN64-MUBUF-NEXT: $sgpr84 = V_READLANE_B32 $vgpr0, 20
-    ; GCN64-MUBUF-NEXT: $sgpr85 = V_READLANE_B32 $vgpr0, 21
-    ; GCN64-MUBUF-NEXT: $sgpr86 = V_READLANE_B32 $vgpr0, 22
-    ; GCN64-MUBUF-NEXT: $sgpr87 = V_READLANE_B32 $vgpr0, 23
-    ; GCN64-MUBUF-NEXT: $sgpr88 = V_READLANE_B32 $vgpr0, 24
-    ; GCN64-MUBUF-NEXT: $sgpr89 = V_READLANE_B32 $vgpr0, 25
-    ; GCN64-MUBUF-NEXT: $sgpr90 = V_READLANE_B32 $vgpr0, 26
-    ; GCN64-MUBUF-NEXT: $sgpr91 = V_READLANE_B32 $vgpr0, 27
-    ; GCN64-MUBUF-NEXT: $sgpr92 = V_READLANE_B32 $vgpr0, 28
-    ; GCN64-MUBUF-NEXT: $sgpr93 = V_READLANE_B32 $vgpr0, 29
-    ; GCN64-MUBUF-NEXT: $sgpr94 = V_READLANE_B32 $vgpr0, 30
-    ; GCN64-MUBUF-NEXT: $sgpr95 = V_READLANE_B32 killed $vgpr0, 31
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
+    ; GCN64-MUBUF-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec
+    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 4294967295, implicit-def $vgpr1
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 160, 0, 0, implicit $exec :: (load (s32) from %stack.7, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $sgpr64 = V_READLANE_B32 $vgpr1, 0, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $sgpr65 = V_READLANE_B32 $vgpr1, 1
+    ; GCN64-MUBUF-NEXT: $sgpr66 = V_READLANE_B32 $vgpr1, 2
+    ; GCN64-MUBUF-NEXT: $sgpr67 = V_READLANE_B32 $vgpr1, 3
+    ; GCN64-MUBUF-NEXT: $sgpr68 = V_READLANE_B32 $vgpr1, 4
+    ; GCN64-MUBUF-NEXT: $sgpr69 = V_READLANE_B32 $vgpr1, 5
+    ; GCN64-MUBUF-NEXT: $sgpr70 = V_READLANE_B32 $vgpr1, 6
+    ; GCN64-MUBUF-NEXT: $sgpr71 = V_READLANE_B32 $vgpr1, 7
+    ; GCN64-MUBUF-NEXT: $sgpr72 = V_READLANE_B32 $vgpr1, 8
+    ; GCN64-MUBUF-NEXT: $sgpr73 = V_READLANE_B32 $vgpr1, 9
+    ; GCN64-MUBUF-NEXT: $sgpr74 = V_READLANE_B32 $vgpr1, 10
+    ; GCN64-MUBUF-NEXT: $sgpr75 = V_READLANE_B32 $vgpr1, 11
+    ; GCN64-MUBUF-NEXT: $sgpr76 = V_READLANE_B32 $vgpr1, 12
+    ; GCN64-MUBUF-NEXT: $sgpr77 = V_READLANE_B32 $vgpr1, 13
+    ; GCN64-MUBUF-NEXT: $sgpr78 = V_READLANE_B32 $vgpr1, 14
+    ; GCN64-MUBUF-NEXT: $sgpr79 = V_READLANE_B32 $vgpr1, 15
+    ; GCN64-MUBUF-NEXT: $sgpr80 = V_READLANE_B32 $vgpr1, 16
+    ; GCN64-MUBUF-NEXT: $sgpr81 = V_READLANE_B32 $vgpr1, 17
+    ; GCN64-MUBUF-NEXT: $sgpr82 = V_READLANE_B32 $vgpr1, 18
+    ; GCN64-MUBUF-NEXT: $sgpr83 = V_READLANE_B32 $vgpr1, 19
+    ; GCN64-MUBUF-NEXT: $sgpr84 = V_READLANE_B32 $vgpr1, 20
+    ; GCN64-MUBUF-NEXT: $sgpr85 = V_READLANE_B32 $vgpr1, 21
+    ; GCN64-MUBUF-NEXT: $sgpr86 = V_READLANE_B32 $vgpr1, 22
+    ; GCN64-MUBUF-NEXT: $sgpr87 = V_READLANE_B32 $vgpr1, 23
+    ; GCN64-MUBUF-NEXT: $sgpr88 = V_READLANE_B32 $vgpr1, 24
+    ; GCN64-MUBUF-NEXT: $sgpr89 = V_READLANE_B32 $vgpr1, 25
+    ; GCN64-MUBUF-NEXT: $sgpr90 = V_READLANE_B32 $vgpr1, 26
+    ; GCN64-MUBUF-NEXT: $sgpr91 = V_READLANE_B32 $vgpr1, 27
+    ; GCN64-MUBUF-NEXT: $sgpr92 = V_READLANE_B32 $vgpr1, 28
+    ; GCN64-MUBUF-NEXT: $sgpr93 = V_READLANE_B32 $vgpr1, 29
+    ; GCN64-MUBUF-NEXT: $sgpr94 = V_READLANE_B32 $vgpr1, 30
+    ; GCN64-MUBUF-NEXT: $sgpr95 = V_READLANE_B32 killed $vgpr1, 31
+    ; GCN64-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr1
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0
     ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
@@ -1148,30 +1148,30 @@ body:             |
     ; GCN32-MUBUF-NEXT: $sgpr99 = S_MOV_B32 834756608, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99
     ; GCN32-MUBUF-NEXT: $sgpr96 = S_ADD_U32 $sgpr96, $sgpr11, implicit-def $scc, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99
     ; GCN32-MUBUF-NEXT: $sgpr97 = S_ADDC_U32 $sgpr97, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99
-    ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
-    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 1, implicit-def $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 killed $vgpr0, 0
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
-    ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
-    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 3, implicit-def $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13
-    ; GCN32-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 killed $vgpr0, 1
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
-    ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
-    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 7, implicit-def $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 16, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14
-    ; GCN32-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 $vgpr0, 1
-    ; GCN32-MUBUF-NEXT: $sgpr14 = V_READLANE_B32 killed $vgpr0, 2
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
+    ; GCN32-MUBUF-NEXT: $sgpr2 = S_MOV_B32 $exec_lo
+    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 1, implicit-def $vgpr1
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 killed $vgpr1, 0
+    ; GCN32-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr2, implicit killed $vgpr1
+    ; GCN32-MUBUF-NEXT: $sgpr1 = S_MOV_B32 $exec_lo
+    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 3, implicit-def $vgpr2
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr2, 0, implicit-def $sgpr12_sgpr13
+    ; GCN32-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 killed $vgpr2, 1
+    ; GCN32-MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1, implicit killed $vgpr2
+    ; GCN32-MUBUF-NEXT: $sgpr3 = S_MOV_B32 $exec_lo
+    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 7, implicit-def $vgpr3
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 16, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr3, 0, implicit-def $sgpr12_sgpr13_sgpr14
+    ; GCN32-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 $vgpr3, 1
+    ; GCN32-MUBUF-NEXT: $sgpr14 = V_READLANE_B32 killed $vgpr3, 2
+    ; GCN32-MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr3, implicit killed $vgpr3
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 15, implicit-def $vgpr0
     ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
@@ -1182,31 +1182,31 @@ body:             |
     ; GCN32-MUBUF-NEXT: $sgpr15 = V_READLANE_B32 killed $vgpr0, 3
     ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
-    ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
-    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 31, implicit-def $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 44, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
-    ; GCN32-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 $vgpr0, 1
-    ; GCN32-MUBUF-NEXT: $sgpr14 = V_READLANE_B32 $vgpr0, 2
-    ; GCN32-MUBUF-NEXT: $sgpr15 = V_READLANE_B32 $vgpr0, 3
-    ; GCN32-MUBUF-NEXT: $sgpr16 = V_READLANE_B32 killed $vgpr0, 4
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
-    ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
-    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 255, implicit-def $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 64, 0, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN32-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 $vgpr0, 1
-    ; GCN32-MUBUF-NEXT: $sgpr14 = V_READLANE_B32 $vgpr0, 2
-    ; GCN32-MUBUF-NEXT: $sgpr15 = V_READLANE_B32 $vgpr0, 3
-    ; GCN32-MUBUF-NEXT: $sgpr16 = V_READLANE_B32 $vgpr0, 4
-    ; GCN32-MUBUF-NEXT: $sgpr17 = V_READLANE_B32 $vgpr0, 5
-    ; GCN32-MUBUF-NEXT: $sgpr18 = V_READLANE_B32 $vgpr0, 6
-    ; GCN32-MUBUF-NEXT: $sgpr19 = V_READLANE_B32 killed $vgpr0, 7
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
+    ; GCN32-MUBUF-NEXT: $sgpr2 = S_MOV_B32 $exec_lo
+    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 31, implicit-def $vgpr1
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 44, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr1, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+    ; GCN32-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 $vgpr1, 1
+    ; GCN32-MUBUF-NEXT: $sgpr14 = V_READLANE_B32 $vgpr1, 2
+    ; GCN32-MUBUF-NEXT: $sgpr15 = V_READLANE_B32 $vgpr1, 3
+    ; GCN32-MUBUF-NEXT: $sgpr16 = V_READLANE_B32 killed $vgpr1, 4
+    ; GCN32-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr2, implicit killed $vgpr1
+    ; GCN32-MUBUF-NEXT: $sgpr1 = S_MOV_B32 $exec_lo
+    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 255, implicit-def $vgpr2
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 64, 0, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr2, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN32-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 $vgpr2, 1
+    ; GCN32-MUBUF-NEXT: $sgpr14 = V_READLANE_B32 $vgpr2, 2
+    ; GCN32-MUBUF-NEXT: $sgpr15 = V_READLANE_B32 $vgpr2, 3
+    ; GCN32-MUBUF-NEXT: $sgpr16 = V_READLANE_B32 $vgpr2, 4
+    ; GCN32-MUBUF-NEXT: $sgpr17 = V_READLANE_B32 $vgpr2, 5
+    ; GCN32-MUBUF-NEXT: $sgpr18 = V_READLANE_B32 $vgpr2, 6
+    ; GCN32-MUBUF-NEXT: $sgpr19 = V_READLANE_B32 killed $vgpr2, 7
+    ; GCN32-MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1, implicit killed $vgpr2
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 65535, implicit-def $vgpr0
     ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
@@ -1229,44 +1229,44 @@ body:             |
     ; GCN32-MUBUF-NEXT: $sgpr27 = V_READLANE_B32 killed $vgpr0, 15
     ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
-    ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
-    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 4294967295, implicit-def $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 160, 0, 0, implicit $exec :: (load (s32) from %stack.7, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $sgpr64 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $sgpr65 = V_READLANE_B32 $vgpr0, 1
-    ; GCN32-MUBUF-NEXT: $sgpr66 = V_READLANE_B32 $vgpr0, 2
-    ; GCN32-MUBUF-NEXT: $sgpr67 = V_READLANE_B32 $vgpr0, 3
-    ; GCN32-MUBUF-NEXT: $sgpr68 = V_READLANE_B32 $vgpr0, 4
-    ; GCN32-MUBUF-NEXT: $sgpr69 = V_READLANE_B32 $vgpr0, 5
-    ; GCN32-MUBUF-NEXT: $sgpr70 = V_READLANE_B32 $vgpr0, 6
-    ; GCN32-MUBUF-NEXT: $sgpr71 = V_READLANE_B32 $vgpr0, 7
-    ; GCN32-MUBUF-NEXT: $sgpr72 = V_READLANE_B32 $vgpr0, 8
-    ; GCN32-MUBUF-NEXT: $sgpr73 = V_READLANE_B32 $vgpr0, 9
-    ; GCN32-MUBUF-NEXT: $sgpr74 = V_READLANE_B32 $vgpr0, 10
-    ; GCN32-MUBUF-NEXT: $sgpr75 = V_READLANE_B32 $vgpr0, 11
-    ; GCN32-MUBUF-NEXT: $sgpr76 = V_READLANE_B32 $vgpr0, 12
-    ; GCN32-MUBUF-NEXT: $sgpr77 = V_READLANE_B32 $vgpr0, 13
-    ; GCN32-MUBUF-NEXT: $sgpr78 = V_READLANE_B32 $vgpr0, 14
-    ; GCN32-MUBUF-NEXT: $sgpr79 = V_READLANE_B32 $vgpr0, 15
-    ; GCN32-MUBUF-NEXT: $sgpr80 = V_READLANE_B32 $vgpr0, 16
-    ; GCN32-MUBUF-NEXT: $sgpr81 = V_READLANE_B32 $vgpr0, 17
-    ; GCN32-MUBUF-NEXT: $sgpr82 = V_READLANE_B32 $vgpr0, 18
-    ; GCN32-MUBUF-NEXT: $sgpr83 = V_READLANE_B32 $vgpr0, 19
-    ; GCN32-MUBUF-NEXT: $sgpr84 = V_READLANE_B32 $vgpr0, 20
-    ; GCN32-MUBUF-NEXT: $sgpr85 = V_READLANE_B32 $vgpr0, 21
-    ; GCN32-MUBUF-NEXT: $sgpr86 = V_READLANE_B32 $vgpr0, 22
-    ; GCN32-MUBUF-NEXT: $sgpr87 = V_READLANE_B32 $vgpr0, 23
-    ; GCN32-MUBUF-NEXT: $sgpr88 = V_READLANE_B32 $vgpr0, 24
-    ; GCN32-MUBUF-NEXT: $sgpr89 = V_READLANE_B32 $vgpr0, 25
-    ; GCN32-MUBUF-NEXT: $sgpr90 = V_READLANE_B32 $vgpr0, 26
-    ; GCN32-MUBUF-NEXT: $sgpr91 = V_READLANE_B32 $vgpr0, 27
-    ; GCN32-MUBUF-NEXT: $sgpr92 = V_READLANE_B32 $vgpr0, 28
-    ; GCN32-MUBUF-NEXT: $sgpr93 = V_READLANE_B32 $vgpr0, 29
-    ; GCN32-MUBUF-NEXT: $sgpr94 = V_READLANE_B32 $vgpr0, 30
-    ; GCN32-MUBUF-NEXT: $sgpr95 = V_READLANE_B32 killed $vgpr0, 31
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
+    ; GCN32-MUBUF-NEXT: $sgpr2 = S_MOV_B32 $exec_lo
+    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 4294967295, implicit-def $vgpr1
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 160, 0, 0, implicit $exec :: (load (s32) from %stack.7, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $sgpr64 = V_READLANE_B32 $vgpr1, 0, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $sgpr65 = V_READLANE_B32 $vgpr1, 1
+    ; GCN32-MUBUF-NEXT: $sgpr66 = V_READLANE_B32 $vgpr1, 2
+    ; GCN32-MUBUF-NEXT: $sgpr67 = V_READLANE_B32 $vgpr1, 3
+    ; GCN32-MUBUF-NEXT: $sgpr68 = V_READLANE_B32 $vgpr1, 4
+    ; GCN32-MUBUF-NEXT: $sgpr69 = V_READLANE_B32 $vgpr1, 5
+    ; GCN32-MUBUF-NEXT: $sgpr70 = V_READLANE_B32 $vgpr1, 6
+    ; GCN32-MUBUF-NEXT: $sgpr71 = V_READLANE_B32 $vgpr1, 7
+    ; GCN32-MUBUF-NEXT: $sgpr72 = V_READLANE_B32 $vgpr1, 8
+    ; GCN32-MUBUF-NEXT: $sgpr73 = V_READLANE_B32 $vgpr1, 9
+    ; GCN32-MUBUF-NEXT: $sgpr74 = V_READLANE_B32 $vgpr1, 10
+    ; GCN32-MUBUF-NEXT: $sgpr75 = V_READLANE_B32 $vgpr1, 11
+    ; GCN32-MUBUF-NEXT: $sgpr76 = V_READLANE_B32 $vgpr1, 12
+    ; GCN32-MUBUF-NEXT: $sgpr77 = V_READLANE_B32 $vgpr1, 13
+    ; GCN32-MUBUF-NEXT: $sgpr78 = V_READLANE_B32 $vgpr1, 14
+    ; GCN32-MUBUF-NEXT: $sgpr79 = V_READLANE_B32 $vgpr1, 15
+    ; GCN32-MUBUF-NEXT: $sgpr80 = V_READLANE_B32 $vgpr1, 16
+    ; GCN32-MUBUF-NEXT: $sgpr81 = V_READLANE_B32 $vgpr1, 17
+    ; GCN32-MUBUF-NEXT: $sgpr82 = V_READLANE_B32 $vgpr1, 18
+    ; GCN32-MUBUF-NEXT: $sgpr83 = V_READLANE_B32 $vgpr1, 19
+    ; GCN32-MUBUF-NEXT: $sgpr84 = V_READLANE_B32 $vgpr1, 20
+    ; GCN32-MUBUF-NEXT: $sgpr85 = V_READLANE_B32 $vgpr1, 21
+    ; GCN32-MUBUF-NEXT: $sgpr86 = V_READLANE_B32 $vgpr1, 22
+    ; GCN32-MUBUF-NEXT: $sgpr87 = V_READLANE_B32 $vgpr1, 23
+    ; GCN32-MUBUF-NEXT: $sgpr88 = V_READLANE_B32 $vgpr1, 24
+    ; GCN32-MUBUF-NEXT: $sgpr89 = V_READLANE_B32 $vgpr1, 25
+    ; GCN32-MUBUF-NEXT: $sgpr90 = V_READLANE_B32 $vgpr1, 26
+    ; GCN32-MUBUF-NEXT: $sgpr91 = V_READLANE_B32 $vgpr1, 27
+    ; GCN32-MUBUF-NEXT: $sgpr92 = V_READLANE_B32 $vgpr1, 28
+    ; GCN32-MUBUF-NEXT: $sgpr93 = V_READLANE_B32 $vgpr1, 29
+    ; GCN32-MUBUF-NEXT: $sgpr94 = V_READLANE_B32 $vgpr1, 30
+    ; GCN32-MUBUF-NEXT: $sgpr95 = V_READLANE_B32 killed $vgpr1, 31
+    ; GCN32-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr2, implicit killed $vgpr1
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 1, implicit-def $vgpr0
     ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
@@ -1281,31 +1281,31 @@ body:             |
     ; GCN64-FLATSCR-NEXT: $sgpr33 = S_MOV_B32 0
     ; GCN64-FLATSCR-NEXT: $flat_scr_lo = S_ADD_U32 $sgpr0, $sgpr11, implicit-def $scc
     ; GCN64-FLATSCR-NEXT: $flat_scr_hi = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc
+    ; GCN64-FLATSCR-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec
+    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr1
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $sgpr12 = V_READLANE_B32 killed $vgpr1, 0
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr1
     ; GCN64-FLATSCR-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec
-    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $sgpr12 = V_READLANE_B32 killed $vgpr0, 0
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0
-    ; GCN64-FLATSCR-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec
-    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13
-    ; GCN64-FLATSCR-NEXT: $sgpr13 = V_READLANE_B32 killed $vgpr0, 1
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0
-    ; GCN64-FLATSCR-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec
-    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 7, implicit-def $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14
-    ; GCN64-FLATSCR-NEXT: $sgpr13 = V_READLANE_B32 $vgpr0, 1
-    ; GCN64-FLATSCR-NEXT: $sgpr14 = V_READLANE_B32 killed $vgpr0, 2
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0
-    ; GCN64-FLATSCR-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec
+    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr2
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $sgpr12 = V_READLANE_B32 $vgpr2, 0, implicit-def $sgpr12_sgpr13
+    ; GCN64-FLATSCR-NEXT: $sgpr13 = V_READLANE_B32 killed $vgpr2, 1
+    ; GCN64-FLATSCR-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr2
+    ; GCN64-FLATSCR-NEXT: $sgpr6_sgpr7 = S_MOV_B64 $exec
+    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 7, implicit-def $vgpr3
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $sgpr12 = V_READLANE_B32 $vgpr3, 0, implicit-def $sgpr12_sgpr13_sgpr14
+    ; GCN64-FLATSCR-NEXT: $sgpr13 = V_READLANE_B32 $vgpr3, 1
+    ; GCN64-FLATSCR-NEXT: $sgpr14 = V_READLANE_B32 killed $vgpr3, 2
+    ; GCN64-FLATSCR-NEXT: $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7, implicit killed $vgpr3
+    ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 15, implicit-def $vgpr0
     ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 28, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.3, addrspace 5)
@@ -1314,33 +1314,33 @@ body:             |
     ; GCN64-FLATSCR-NEXT: $sgpr14 = V_READLANE_B32 $vgpr0, 2
     ; GCN64-FLATSCR-NEXT: $sgpr15 = V_READLANE_B32 killed $vgpr0, 3
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0
-    ; GCN64-FLATSCR-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec
-    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 31, implicit-def $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 44, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.4, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
-    ; GCN64-FLATSCR-NEXT: $sgpr13 = V_READLANE_B32 $vgpr0, 1
-    ; GCN64-FLATSCR-NEXT: $sgpr14 = V_READLANE_B32 $vgpr0, 2
-    ; GCN64-FLATSCR-NEXT: $sgpr15 = V_READLANE_B32 $vgpr0, 3
-    ; GCN64-FLATSCR-NEXT: $sgpr16 = V_READLANE_B32 killed $vgpr0, 4
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0
-    ; GCN64-FLATSCR-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec
-    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 255, implicit-def $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 64, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.5, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN64-FLATSCR-NEXT: $sgpr13 = V_READLANE_B32 $vgpr0, 1
-    ; GCN64-FLATSCR-NEXT: $sgpr14 = V_READLANE_B32 $vgpr0, 2
-    ; GCN64-FLATSCR-NEXT: $sgpr15 = V_READLANE_B32 $vgpr0, 3
-    ; GCN64-FLATSCR-NEXT: $sgpr16 = V_READLANE_B32 $vgpr0, 4
-    ; GCN64-FLATSCR-NEXT: $sgpr17 = V_READLANE_B32 $vgpr0, 5
-    ; GCN64-FLATSCR-NEXT: $sgpr18 = V_READLANE_B32 $vgpr0, 6
-    ; GCN64-FLATSCR-NEXT: $sgpr19 = V_READLANE_B32 killed $vgpr0, 7
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0
+    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
+    ; GCN64-FLATSCR-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec
+    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 31, implicit-def $vgpr1
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 44, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.4, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $sgpr12 = V_READLANE_B32 $vgpr1, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+    ; GCN64-FLATSCR-NEXT: $sgpr13 = V_READLANE_B32 $vgpr1, 1
+    ; GCN64-FLATSCR-NEXT: $sgpr14 = V_READLANE_B32 $vgpr1, 2
+    ; GCN64-FLATSCR-NEXT: $sgpr15 = V_READLANE_B32 $vgpr1, 3
+    ; GCN64-FLATSCR-NEXT: $sgpr16 = V_READLANE_B32 killed $vgpr1, 4
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr1
     ; GCN64-FLATSCR-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec
+    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 255, implicit-def $vgpr2
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 64, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.5, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $sgpr12 = V_READLANE_B32 $vgpr2, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN64-FLATSCR-NEXT: $sgpr13 = V_READLANE_B32 $vgpr2, 1
+    ; GCN64-FLATSCR-NEXT: $sgpr14 = V_READLANE_B32 $vgpr2, 2
+    ; GCN64-FLATSCR-NEXT: $sgpr15 = V_READLANE_B32 $vgpr2, 3
+    ; GCN64-FLATSCR-NEXT: $sgpr16 = V_READLANE_B32 $vgpr2, 4
+    ; GCN64-FLATSCR-NEXT: $sgpr17 = V_READLANE_B32 $vgpr2, 5
+    ; GCN64-FLATSCR-NEXT: $sgpr18 = V_READLANE_B32 $vgpr2, 6
+    ; GCN64-FLATSCR-NEXT: $sgpr19 = V_READLANE_B32 killed $vgpr2, 7
+    ; GCN64-FLATSCR-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr2
+    ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 65535, implicit-def $vgpr0
     ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 96, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.6, addrspace 5)
@@ -1361,53 +1361,53 @@ body:             |
     ; GCN64-FLATSCR-NEXT: $sgpr26 = V_READLANE_B32 $vgpr0, 14
     ; GCN64-FLATSCR-NEXT: $sgpr27 = V_READLANE_B32 killed $vgpr0, 15
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0
-    ; GCN64-FLATSCR-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec
-    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 4294967295, implicit-def $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 160, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.7, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $sgpr64 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $sgpr65 = V_READLANE_B32 $vgpr0, 1
-    ; GCN64-FLATSCR-NEXT: $sgpr66 = V_READLANE_B32 $vgpr0, 2
-    ; GCN64-FLATSCR-NEXT: $sgpr67 = V_READLANE_B32 $vgpr0, 3
-    ; GCN64-FLATSCR-NEXT: $sgpr68 = V_READLANE_B32 $vgpr0, 4
-    ; GCN64-FLATSCR-NEXT: $sgpr69 = V_READLANE_B32 $vgpr0, 5
-    ; GCN64-FLATSCR-NEXT: $sgpr70 = V_READLANE_B32 $vgpr0, 6
-    ; GCN64-FLATSCR-NEXT: $sgpr71 = V_READLANE_B32 $vgpr0, 7
-    ; GCN64-FLATSCR-NEXT: $sgpr72 = V_READLANE_B32 $vgpr0, 8
-    ; GCN64-FLATSCR-NEXT: $sgpr73 = V_READLANE_B32 $vgpr0, 9
-    ; GCN64-FLATSCR-NEXT: $sgpr74 = V_READLANE_B32 $vgpr0, 10
-    ; GCN64-FLATSCR-NEXT: $sgpr75 = V_READLANE_B32 $vgpr0, 11
-    ; GCN64-FLATSCR-NEXT: $sgpr76 = V_READLANE_B32 $vgpr0, 12
-    ; GCN64-FLATSCR-NEXT: $sgpr77 = V_READLANE_B32 $vgpr0, 13
-    ; GCN64-FLATSCR-NEXT: $sgpr78 = V_READLANE_B32 $vgpr0, 14
-    ; GCN64-FLATSCR-NEXT: $sgpr79 = V_READLANE_B32 $vgpr0, 15
-    ; GCN64-FLATSCR-NEXT: $sgpr80 = V_READLANE_B32 $vgpr0, 16
-    ; GCN64-FLATSCR-NEXT: $sgpr81 = V_READLANE_B32 $vgpr0, 17
-    ; GCN64-FLATSCR-NEXT: $sgpr82 = V_READLANE_B32 $vgpr0, 18
-    ; GCN64-FLATSCR-NEXT: $sgpr83 = V_READLANE_B32 $vgpr0, 19
-    ; GCN64-FLATSCR-NEXT: $sgpr84 = V_READLANE_B32 $vgpr0, 20
-    ; GCN64-FLATSCR-NEXT: $sgpr85 = V_READLANE_B32 $vgpr0, 21
-    ; GCN64-FLATSCR-NEXT: $sgpr86 = V_READLANE_B32 $vgpr0, 22
-    ; GCN64-FLATSCR-NEXT: $sgpr87 = V_READLANE_B32 $vgpr0, 23
-    ; GCN64-FLATSCR-NEXT: $sgpr88 = V_READLANE_B32 $vgpr0, 24
-    ; GCN64-FLATSCR-NEXT: $sgpr89 = V_READLANE_B32 $vgpr0, 25
-    ; GCN64-FLATSCR-NEXT: $sgpr90 = V_READLANE_B32 $vgpr0, 26
-    ; GCN64-FLATSCR-NEXT: $sgpr91 = V_READLANE_B32 $vgpr0, 27
-    ; GCN64-FLATSCR-NEXT: $sgpr92 = V_READLANE_B32 $vgpr0, 28
-    ; GCN64-FLATSCR-NEXT: $sgpr93 = V_READLANE_B32 $vgpr0, 29
-    ; GCN64-FLATSCR-NEXT: $sgpr94 = V_READLANE_B32 $vgpr0, 30
-    ; GCN64-FLATSCR-NEXT: $sgpr95 = V_READLANE_B32 killed $vgpr0, 31
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0
-    ; GCN64-FLATSCR-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec
+    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
+    ; GCN64-FLATSCR-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec
+    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 4294967295, implicit-def $vgpr1
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 160, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.7, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $sgpr64 = V_READLANE_B32 $vgpr1, 0, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $sgpr65 = V_READLANE_B32 $vgpr1, 1
+    ; GCN64-FLATSCR-NEXT: $sgpr66 = V_READLANE_B32 $vgpr1, 2
+    ; GCN64-FLATSCR-NEXT: $sgpr67 = V_READLANE_B32 $vgpr1, 3
+    ; GCN64-FLATSCR-NEXT: $sgpr68 = V_READLANE_B32 $vgpr1, 4
+    ; GCN64-FLATSCR-NEXT: $sgpr69 = V_READLANE_B32 $vgpr1, 5
+    ; GCN64-FLATSCR-NEXT: $sgpr70 = V_READLANE_B32 $vgpr1, 6
+    ; GCN64-FLATSCR-NEXT: $sgpr71 = V_READLANE_B32 $vgpr1, 7
+    ; GCN64-FLATSCR-NEXT: $sgpr72 = V_READLANE_B32 $vgpr1, 8
+    ; GCN64-FLATSCR-NEXT: $sgpr73 = V_READLANE_B32 $vgpr1, 9
+    ; GCN64-FLATSCR-NEXT: $sgpr74 = V_READLANE_B32 $vgpr1, 10
+    ; GCN64-FLATSCR-NEXT: $sgpr75 = V_READLANE_B32 $vgpr1, 11
+    ; GCN64-FLATSCR-NEXT: $sgpr76 = V_READLANE_B32 $vgpr1, 12
+    ; GCN64-FLATSCR-NEXT: $sgpr77 = V_READLANE_B32 $vgpr1, 13
+    ; GCN64-FLATSCR-NEXT: $sgpr78 = V_READLANE_B32 $vgpr1, 14
+    ; GCN64-FLATSCR-NEXT: $sgpr79 = V_READLANE_B32 $vgpr1, 15
+    ; GCN64-FLATSCR-NEXT: $sgpr80 = V_READLANE_B32 $vgpr1, 16
+    ; GCN64-FLATSCR-NEXT: $sgpr81 = V_READLANE_B32 $vgpr1, 17
+    ; GCN64-FLATSCR-NEXT: $sgpr82 = V_READLANE_B32 $vgpr1, 18
+    ; GCN64-FLATSCR-NEXT: $sgpr83 = V_READLANE_B32 $vgpr1, 19
+    ; GCN64-FLATSCR-NEXT: $sgpr84 = V_READLANE_B32 $vgpr1, 20
+    ; GCN64-FLATSCR-NEXT: $sgpr85 = V_READLANE_B32 $vgpr1, 21
+    ; GCN64-FLATSCR-NEXT: $sgpr86 = V_READLANE_B32 $vgpr1, 22
+    ; GCN64-FLATSCR-NEXT: $sgpr87 = V_READLANE_B32 $vgpr1, 23
+    ; GCN64-FLATSCR-NEXT: $sgpr88 = V_READLANE_B32 $vgpr1, 24
+    ; GCN64-FLATSCR-NEXT: $sgpr89 = V_READLANE_B32 $vgpr1, 25
+    ; GCN64-FLATSCR-NEXT: $sgpr90 = V_READLANE_B32 $vgpr1, 26
+    ; GCN64-FLATSCR-NEXT: $sgpr91 = V_READLANE_B32 $vgpr1, 27
+    ; GCN64-FLATSCR-NEXT: $sgpr92 = V_READLANE_B32 $vgpr1, 28
+    ; GCN64-FLATSCR-NEXT: $sgpr93 = V_READLANE_B32 $vgpr1, 29
+    ; GCN64-FLATSCR-NEXT: $sgpr94 = V_READLANE_B32 $vgpr1, 30
+    ; GCN64-FLATSCR-NEXT: $sgpr95 = V_READLANE_B32 killed $vgpr1, 31
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr1
+    ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0
     ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $sgpr9 = S_ADD_I32 $sgpr33, 4096, implicit-def dead $scc
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR killed $sgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, align 4096, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $sgpr2 = S_ADD_I32 $sgpr33, 4096, implicit-def dead $scc
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR killed $sgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, align 4096, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $sgpr12 = V_READLANE_B32 killed $vgpr0, 0
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0
+    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     renamable $sgpr12 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
 
     renamable $sgpr12_sgpr13 = SI_SPILL_S64_RESTORE %stack.1, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32

diff  --git a/llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll b/llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll
index 515253e6a43f..2b7635fede0b 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll
@@ -1,22 +1,275 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple amdgcn-amd-amdhsa -mcpu=gfx803 -O0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
 
 define void @child_function() #0 {
+; GCN-LABEL: child_function:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    s_setpc_b64 s[30:31]
   call void asm sideeffect "", "~{vcc}" () #0
   ret void
 }
 
-; GCN-LABEL: {{^}}spill_sgpr_with_no_lower_vgpr_available:
-; GCN:  buffer_store_dword v255, off, s[0:3], s32
-; GCN:  v_writelane_b32 v255, s33, 2
-; GCN:  v_writelane_b32 v255, s30, 0
-; GCN:  v_writelane_b32 v255, s31, 1
-; GCN:  s_swappc_b64 s[30:31], s[4:5]
-; GCN:  v_readlane_b32 s31, v255, 1
-; GCN:  v_readlane_b32 s30, v255, 0
-; GCN:  v_readlane_b32 s33, v255, 2
-; GCN: ; NumVgprs: 256
-
 define void @spill_sgpr_with_no_lower_vgpr_available() #0 {
+; GCN-LABEL: spill_sgpr_with_no_lower_vgpr_available:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    s_or_saveexec_b64 s[4:5], -1
+; GCN-NEXT:    buffer_store_dword v255, off, s[0:3], s32 offset:448 ; 4-byte Folded Spill
+; GCN-NEXT:    s_mov_b64 exec, s[4:5]
+; GCN-NEXT:    v_writelane_b32 v255, s33, 2
+; GCN-NEXT:    s_mov_b32 s33, s32
+; GCN-NEXT:    s_add_i32 s32, s32, 0x7400
+; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s33 offset:440 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v41, off, s[0:3], s33 offset:436 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v42, off, s[0:3], s33 offset:432 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v43, off, s[0:3], s33 offset:428 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v44, off, s[0:3], s33 offset:424 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v45, off, s[0:3], s33 offset:420 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v46, off, s[0:3], s33 offset:416 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v47, off, s[0:3], s33 offset:412 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v56, off, s[0:3], s33 offset:408 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v57, off, s[0:3], s33 offset:404 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v58, off, s[0:3], s33 offset:400 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v59, off, s[0:3], s33 offset:396 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v60, off, s[0:3], s33 offset:392 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v61, off, s[0:3], s33 offset:388 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v62, off, s[0:3], s33 offset:384 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v63, off, s[0:3], s33 offset:380 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v72, off, s[0:3], s33 offset:376 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v73, off, s[0:3], s33 offset:372 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v74, off, s[0:3], s33 offset:368 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v75, off, s[0:3], s33 offset:364 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v76, off, s[0:3], s33 offset:360 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v77, off, s[0:3], s33 offset:356 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v78, off, s[0:3], s33 offset:352 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v79, off, s[0:3], s33 offset:348 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v88, off, s[0:3], s33 offset:344 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v89, off, s[0:3], s33 offset:340 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v90, off, s[0:3], s33 offset:336 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v91, off, s[0:3], s33 offset:332 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v92, off, s[0:3], s33 offset:328 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v93, off, s[0:3], s33 offset:324 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v94, off, s[0:3], s33 offset:320 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v95, off, s[0:3], s33 offset:316 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v104, off, s[0:3], s33 offset:312 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v105, off, s[0:3], s33 offset:308 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v106, off, s[0:3], s33 offset:304 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v107, off, s[0:3], s33 offset:300 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v108, off, s[0:3], s33 offset:296 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v109, off, s[0:3], s33 offset:292 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v110, off, s[0:3], s33 offset:288 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v111, off, s[0:3], s33 offset:284 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v120, off, s[0:3], s33 offset:280 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v121, off, s[0:3], s33 offset:276 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v122, off, s[0:3], s33 offset:272 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v123, off, s[0:3], s33 offset:268 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v124, off, s[0:3], s33 offset:264 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v125, off, s[0:3], s33 offset:260 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v126, off, s[0:3], s33 offset:256 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v127, off, s[0:3], s33 offset:252 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v136, off, s[0:3], s33 offset:248 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v137, off, s[0:3], s33 offset:244 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v138, off, s[0:3], s33 offset:240 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v139, off, s[0:3], s33 offset:236 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v140, off, s[0:3], s33 offset:232 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v141, off, s[0:3], s33 offset:228 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v142, off, s[0:3], s33 offset:224 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v143, off, s[0:3], s33 offset:220 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v152, off, s[0:3], s33 offset:216 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v153, off, s[0:3], s33 offset:212 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v154, off, s[0:3], s33 offset:208 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v155, off, s[0:3], s33 offset:204 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v156, off, s[0:3], s33 offset:200 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v157, off, s[0:3], s33 offset:196 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v158, off, s[0:3], s33 offset:192 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v159, off, s[0:3], s33 offset:188 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v168, off, s[0:3], s33 offset:184 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v169, off, s[0:3], s33 offset:180 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v170, off, s[0:3], s33 offset:176 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v171, off, s[0:3], s33 offset:172 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v172, off, s[0:3], s33 offset:168 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v173, off, s[0:3], s33 offset:164 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v174, off, s[0:3], s33 offset:160 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v175, off, s[0:3], s33 offset:156 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v184, off, s[0:3], s33 offset:152 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v185, off, s[0:3], s33 offset:148 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v186, off, s[0:3], s33 offset:144 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v187, off, s[0:3], s33 offset:140 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v188, off, s[0:3], s33 offset:136 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v189, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v190, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v191, off, s[0:3], s33 offset:124 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v200, off, s[0:3], s33 offset:120 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v201, off, s[0:3], s33 offset:116 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v202, off, s[0:3], s33 offset:112 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v203, off, s[0:3], s33 offset:108 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v204, off, s[0:3], s33 offset:104 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v205, off, s[0:3], s33 offset:100 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v206, off, s[0:3], s33 offset:96 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v207, off, s[0:3], s33 offset:92 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v216, off, s[0:3], s33 offset:88 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v217, off, s[0:3], s33 offset:84 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v218, off, s[0:3], s33 offset:80 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v219, off, s[0:3], s33 offset:76 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v220, off, s[0:3], s33 offset:72 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v221, off, s[0:3], s33 offset:68 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v222, off, s[0:3], s33 offset:64 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v223, off, s[0:3], s33 offset:60 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v232, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v233, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v234, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v235, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v236, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v237, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v238, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v239, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v248, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v249, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v250, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v251, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v252, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v253, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v254, off, s[0:3], s33 ; 4-byte Folded Spill
+; GCN-NEXT:    v_writelane_b32 v255, s30, 0
+; GCN-NEXT:    v_writelane_b32 v255, s31, 1
+; GCN-NEXT:    v_mov_b32_e32 v0, 0
+; GCN-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:444
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    s_getpc_b64 s[4:5]
+; GCN-NEXT:    s_add_u32 s4, s4, child_function at gotpcrel32@lo+4
+; GCN-NEXT:    s_addc_u32 s5, s5, child_function at gotpcrel32@hi+12
+; GCN-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GCN-NEXT:    s_mov_b64 s[10:11], s[2:3]
+; GCN-NEXT:    s_mov_b64 s[8:9], s[0:1]
+; GCN-NEXT:    s_mov_b64 s[0:1], s[8:9]
+; GCN-NEXT:    s_mov_b64 s[2:3], s[10:11]
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GCN-NEXT:    v_readlane_b32 s31, v255, 1
+; GCN-NEXT:    v_readlane_b32 s30, v255, 0
+; GCN-NEXT:    buffer_load_dword v254, off, s[0:3], s33 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v253, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v252, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v251, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v250, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v249, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v248, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v239, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v238, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v237, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v236, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v235, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v234, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v233, off, s[0:3], s33 offset:52 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v232, off, s[0:3], s33 offset:56 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v223, off, s[0:3], s33 offset:60 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v222, off, s[0:3], s33 offset:64 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v221, off, s[0:3], s33 offset:68 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v220, off, s[0:3], s33 offset:72 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v219, off, s[0:3], s33 offset:76 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v218, off, s[0:3], s33 offset:80 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v217, off, s[0:3], s33 offset:84 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v216, off, s[0:3], s33 offset:88 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v207, off, s[0:3], s33 offset:92 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v206, off, s[0:3], s33 offset:96 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v205, off, s[0:3], s33 offset:100 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v204, off, s[0:3], s33 offset:104 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v203, off, s[0:3], s33 offset:108 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v202, off, s[0:3], s33 offset:112 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v201, off, s[0:3], s33 offset:116 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v200, off, s[0:3], s33 offset:120 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v191, off, s[0:3], s33 offset:124 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v190, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v189, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v188, off, s[0:3], s33 offset:136 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v187, off, s[0:3], s33 offset:140 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v186, off, s[0:3], s33 offset:144 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v185, off, s[0:3], s33 offset:148 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v184, off, s[0:3], s33 offset:152 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v175, off, s[0:3], s33 offset:156 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v174, off, s[0:3], s33 offset:160 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v173, off, s[0:3], s33 offset:164 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v172, off, s[0:3], s33 offset:168 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v171, off, s[0:3], s33 offset:172 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v170, off, s[0:3], s33 offset:176 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v169, off, s[0:3], s33 offset:180 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v168, off, s[0:3], s33 offset:184 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v159, off, s[0:3], s33 offset:188 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v158, off, s[0:3], s33 offset:192 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v157, off, s[0:3], s33 offset:196 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v156, off, s[0:3], s33 offset:200 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v155, off, s[0:3], s33 offset:204 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v154, off, s[0:3], s33 offset:208 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v153, off, s[0:3], s33 offset:212 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v152, off, s[0:3], s33 offset:216 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v143, off, s[0:3], s33 offset:220 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v142, off, s[0:3], s33 offset:224 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v141, off, s[0:3], s33 offset:228 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v140, off, s[0:3], s33 offset:232 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v139, off, s[0:3], s33 offset:236 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v138, off, s[0:3], s33 offset:240 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v137, off, s[0:3], s33 offset:244 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v136, off, s[0:3], s33 offset:248 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v127, off, s[0:3], s33 offset:252 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v126, off, s[0:3], s33 offset:256 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v125, off, s[0:3], s33 offset:260 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v124, off, s[0:3], s33 offset:264 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v123, off, s[0:3], s33 offset:268 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v122, off, s[0:3], s33 offset:272 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v121, off, s[0:3], s33 offset:276 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v120, off, s[0:3], s33 offset:280 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v111, off, s[0:3], s33 offset:284 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v110, off, s[0:3], s33 offset:288 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v109, off, s[0:3], s33 offset:292 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v108, off, s[0:3], s33 offset:296 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v107, off, s[0:3], s33 offset:300 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v106, off, s[0:3], s33 offset:304 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v105, off, s[0:3], s33 offset:308 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v104, off, s[0:3], s33 offset:312 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v95, off, s[0:3], s33 offset:316 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v94, off, s[0:3], s33 offset:320 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v93, off, s[0:3], s33 offset:324 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v92, off, s[0:3], s33 offset:328 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v91, off, s[0:3], s33 offset:332 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v90, off, s[0:3], s33 offset:336 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v89, off, s[0:3], s33 offset:340 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v88, off, s[0:3], s33 offset:344 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v79, off, s[0:3], s33 offset:348 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v78, off, s[0:3], s33 offset:352 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v77, off, s[0:3], s33 offset:356 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v76, off, s[0:3], s33 offset:360 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v75, off, s[0:3], s33 offset:364 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v74, off, s[0:3], s33 offset:368 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v73, off, s[0:3], s33 offset:372 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v72, off, s[0:3], s33 offset:376 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v63, off, s[0:3], s33 offset:380 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v62, off, s[0:3], s33 offset:384 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v61, off, s[0:3], s33 offset:388 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v60, off, s[0:3], s33 offset:392 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v59, off, s[0:3], s33 offset:396 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v58, off, s[0:3], s33 offset:400 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v57, off, s[0:3], s33 offset:404 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v56, off, s[0:3], s33 offset:408 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v47, off, s[0:3], s33 offset:412 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v46, off, s[0:3], s33 offset:416 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v45, off, s[0:3], s33 offset:420 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v44, off, s[0:3], s33 offset:424 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v43, off, s[0:3], s33 offset:428 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v42, off, s[0:3], s33 offset:432 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v41, off, s[0:3], s33 offset:436 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s33 offset:440 ; 4-byte Folded Reload
+; GCN-NEXT:    s_add_i32 s32, s32, 0xffff8c00
+; GCN-NEXT:    v_readlane_b32 s33, v255, 2
+; GCN-NEXT:    s_or_saveexec_b64 s[4:5], -1
+; GCN-NEXT:    buffer_load_dword v255, off, s[0:3], s32 offset:448 ; 4-byte Folded Reload
+; GCN-NEXT:    s_mov_b64 exec, s[4:5]
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    s_setpc_b64 s[30:31]
   %alloca = alloca i32, align 4, addrspace(5)
   store volatile i32 0, i32 addrspace(5)* %alloca
 
@@ -51,17 +304,262 @@ define void @spill_sgpr_with_no_lower_vgpr_available() #0 {
   ret void
 }
 
-; GCN-LABEL: {{^}}spill_to_lowest_available_vgpr:
-; GCN:  buffer_store_dword v254, off, s[0:3], s32
-; GCN:  v_writelane_b32 v254, s33, 2
-; GCN:  v_writelane_b32 v254, s30, 0
-; GCN:  v_writelane_b32 v254, s31, 1
-; GCN:  s_swappc_b64 s[30:31], s[4:5]
-; GCN:  v_readlane_b32 s31, v254, 1
-; GCN:  v_readlane_b32 s30, v254, 0
-; GCN:  v_readlane_b32 s33, v254, 2
-
 define void @spill_to_lowest_available_vgpr() #0 {
+; GCN-LABEL: spill_to_lowest_available_vgpr:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    s_or_saveexec_b64 s[4:5], -1
+; GCN-NEXT:    buffer_store_dword v254, off, s[0:3], s32 offset:444 ; 4-byte Folded Spill
+; GCN-NEXT:    s_mov_b64 exec, s[4:5]
+; GCN-NEXT:    v_writelane_b32 v254, s33, 2
+; GCN-NEXT:    s_mov_b32 s33, s32
+; GCN-NEXT:    s_add_i32 s32, s32, 0x7400
+; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s33 offset:436 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v41, off, s[0:3], s33 offset:432 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v42, off, s[0:3], s33 offset:428 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v43, off, s[0:3], s33 offset:424 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v44, off, s[0:3], s33 offset:420 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v45, off, s[0:3], s33 offset:416 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v46, off, s[0:3], s33 offset:412 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v47, off, s[0:3], s33 offset:408 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v56, off, s[0:3], s33 offset:404 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v57, off, s[0:3], s33 offset:400 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v58, off, s[0:3], s33 offset:396 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v59, off, s[0:3], s33 offset:392 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v60, off, s[0:3], s33 offset:388 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v61, off, s[0:3], s33 offset:384 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v62, off, s[0:3], s33 offset:380 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v63, off, s[0:3], s33 offset:376 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v72, off, s[0:3], s33 offset:372 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v73, off, s[0:3], s33 offset:368 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v74, off, s[0:3], s33 offset:364 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v75, off, s[0:3], s33 offset:360 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v76, off, s[0:3], s33 offset:356 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v77, off, s[0:3], s33 offset:352 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v78, off, s[0:3], s33 offset:348 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v79, off, s[0:3], s33 offset:344 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v88, off, s[0:3], s33 offset:340 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v89, off, s[0:3], s33 offset:336 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v90, off, s[0:3], s33 offset:332 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v91, off, s[0:3], s33 offset:328 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v92, off, s[0:3], s33 offset:324 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v93, off, s[0:3], s33 offset:320 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v94, off, s[0:3], s33 offset:316 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v95, off, s[0:3], s33 offset:312 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v104, off, s[0:3], s33 offset:308 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v105, off, s[0:3], s33 offset:304 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v106, off, s[0:3], s33 offset:300 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v107, off, s[0:3], s33 offset:296 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v108, off, s[0:3], s33 offset:292 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v109, off, s[0:3], s33 offset:288 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v110, off, s[0:3], s33 offset:284 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v111, off, s[0:3], s33 offset:280 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v120, off, s[0:3], s33 offset:276 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v121, off, s[0:3], s33 offset:272 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v122, off, s[0:3], s33 offset:268 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v123, off, s[0:3], s33 offset:264 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v124, off, s[0:3], s33 offset:260 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v125, off, s[0:3], s33 offset:256 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v126, off, s[0:3], s33 offset:252 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v127, off, s[0:3], s33 offset:248 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v136, off, s[0:3], s33 offset:244 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v137, off, s[0:3], s33 offset:240 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v138, off, s[0:3], s33 offset:236 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v139, off, s[0:3], s33 offset:232 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v140, off, s[0:3], s33 offset:228 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v141, off, s[0:3], s33 offset:224 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v142, off, s[0:3], s33 offset:220 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v143, off, s[0:3], s33 offset:216 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v152, off, s[0:3], s33 offset:212 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v153, off, s[0:3], s33 offset:208 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v154, off, s[0:3], s33 offset:204 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v155, off, s[0:3], s33 offset:200 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v156, off, s[0:3], s33 offset:196 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v157, off, s[0:3], s33 offset:192 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v158, off, s[0:3], s33 offset:188 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v159, off, s[0:3], s33 offset:184 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v168, off, s[0:3], s33 offset:180 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v169, off, s[0:3], s33 offset:176 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v170, off, s[0:3], s33 offset:172 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v171, off, s[0:3], s33 offset:168 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v172, off, s[0:3], s33 offset:164 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v173, off, s[0:3], s33 offset:160 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v174, off, s[0:3], s33 offset:156 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v175, off, s[0:3], s33 offset:152 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v184, off, s[0:3], s33 offset:148 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v185, off, s[0:3], s33 offset:144 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v186, off, s[0:3], s33 offset:140 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v187, off, s[0:3], s33 offset:136 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v188, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v189, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v190, off, s[0:3], s33 offset:124 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v191, off, s[0:3], s33 offset:120 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v200, off, s[0:3], s33 offset:116 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v201, off, s[0:3], s33 offset:112 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v202, off, s[0:3], s33 offset:108 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v203, off, s[0:3], s33 offset:104 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v204, off, s[0:3], s33 offset:100 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v205, off, s[0:3], s33 offset:96 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v206, off, s[0:3], s33 offset:92 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v207, off, s[0:3], s33 offset:88 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v216, off, s[0:3], s33 offset:84 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v217, off, s[0:3], s33 offset:80 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v218, off, s[0:3], s33 offset:76 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v219, off, s[0:3], s33 offset:72 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v220, off, s[0:3], s33 offset:68 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v221, off, s[0:3], s33 offset:64 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v222, off, s[0:3], s33 offset:60 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v223, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v232, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v233, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v234, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v235, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v236, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v237, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v238, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v239, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v248, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v249, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v250, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v251, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v252, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v253, off, s[0:3], s33 ; 4-byte Folded Spill
+; GCN-NEXT:    v_writelane_b32 v254, s30, 0
+; GCN-NEXT:    v_writelane_b32 v254, s31, 1
+; GCN-NEXT:    v_mov_b32_e32 v0, 0
+; GCN-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:440
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    s_getpc_b64 s[4:5]
+; GCN-NEXT:    s_add_u32 s4, s4, child_function at gotpcrel32@lo+4
+; GCN-NEXT:    s_addc_u32 s5, s5, child_function at gotpcrel32@hi+12
+; GCN-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GCN-NEXT:    s_mov_b64 s[10:11], s[2:3]
+; GCN-NEXT:    s_mov_b64 s[8:9], s[0:1]
+; GCN-NEXT:    s_mov_b64 s[0:1], s[8:9]
+; GCN-NEXT:    s_mov_b64 s[2:3], s[10:11]
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GCN-NEXT:    v_readlane_b32 s31, v254, 1
+; GCN-NEXT:    v_readlane_b32 s30, v254, 0
+; GCN-NEXT:    buffer_load_dword v253, off, s[0:3], s33 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v252, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v251, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v250, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v249, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v248, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v239, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v238, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v237, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v236, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v235, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v234, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v233, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v232, off, s[0:3], s33 offset:52 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v223, off, s[0:3], s33 offset:56 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v222, off, s[0:3], s33 offset:60 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v221, off, s[0:3], s33 offset:64 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v220, off, s[0:3], s33 offset:68 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v219, off, s[0:3], s33 offset:72 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v218, off, s[0:3], s33 offset:76 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v217, off, s[0:3], s33 offset:80 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v216, off, s[0:3], s33 offset:84 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v207, off, s[0:3], s33 offset:88 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v206, off, s[0:3], s33 offset:92 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v205, off, s[0:3], s33 offset:96 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v204, off, s[0:3], s33 offset:100 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v203, off, s[0:3], s33 offset:104 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v202, off, s[0:3], s33 offset:108 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v201, off, s[0:3], s33 offset:112 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v200, off, s[0:3], s33 offset:116 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v191, off, s[0:3], s33 offset:120 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v190, off, s[0:3], s33 offset:124 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v189, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v188, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v187, off, s[0:3], s33 offset:136 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v186, off, s[0:3], s33 offset:140 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v185, off, s[0:3], s33 offset:144 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v184, off, s[0:3], s33 offset:148 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v175, off, s[0:3], s33 offset:152 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v174, off, s[0:3], s33 offset:156 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v173, off, s[0:3], s33 offset:160 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v172, off, s[0:3], s33 offset:164 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v171, off, s[0:3], s33 offset:168 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v170, off, s[0:3], s33 offset:172 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v169, off, s[0:3], s33 offset:176 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v168, off, s[0:3], s33 offset:180 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v159, off, s[0:3], s33 offset:184 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v158, off, s[0:3], s33 offset:188 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v157, off, s[0:3], s33 offset:192 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v156, off, s[0:3], s33 offset:196 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v155, off, s[0:3], s33 offset:200 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v154, off, s[0:3], s33 offset:204 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v153, off, s[0:3], s33 offset:208 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v152, off, s[0:3], s33 offset:212 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v143, off, s[0:3], s33 offset:216 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v142, off, s[0:3], s33 offset:220 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v141, off, s[0:3], s33 offset:224 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v140, off, s[0:3], s33 offset:228 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v139, off, s[0:3], s33 offset:232 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v138, off, s[0:3], s33 offset:236 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v137, off, s[0:3], s33 offset:240 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v136, off, s[0:3], s33 offset:244 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v127, off, s[0:3], s33 offset:248 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v126, off, s[0:3], s33 offset:252 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v125, off, s[0:3], s33 offset:256 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v124, off, s[0:3], s33 offset:260 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v123, off, s[0:3], s33 offset:264 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v122, off, s[0:3], s33 offset:268 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v121, off, s[0:3], s33 offset:272 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v120, off, s[0:3], s33 offset:276 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v111, off, s[0:3], s33 offset:280 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v110, off, s[0:3], s33 offset:284 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v109, off, s[0:3], s33 offset:288 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v108, off, s[0:3], s33 offset:292 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v107, off, s[0:3], s33 offset:296 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v106, off, s[0:3], s33 offset:300 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v105, off, s[0:3], s33 offset:304 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v104, off, s[0:3], s33 offset:308 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v95, off, s[0:3], s33 offset:312 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v94, off, s[0:3], s33 offset:316 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v93, off, s[0:3], s33 offset:320 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v92, off, s[0:3], s33 offset:324 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v91, off, s[0:3], s33 offset:328 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v90, off, s[0:3], s33 offset:332 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v89, off, s[0:3], s33 offset:336 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v88, off, s[0:3], s33 offset:340 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v79, off, s[0:3], s33 offset:344 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v78, off, s[0:3], s33 offset:348 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v77, off, s[0:3], s33 offset:352 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v76, off, s[0:3], s33 offset:356 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v75, off, s[0:3], s33 offset:360 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v74, off, s[0:3], s33 offset:364 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v73, off, s[0:3], s33 offset:368 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v72, off, s[0:3], s33 offset:372 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v63, off, s[0:3], s33 offset:376 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v62, off, s[0:3], s33 offset:380 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v61, off, s[0:3], s33 offset:384 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v60, off, s[0:3], s33 offset:388 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v59, off, s[0:3], s33 offset:392 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v58, off, s[0:3], s33 offset:396 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v57, off, s[0:3], s33 offset:400 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v56, off, s[0:3], s33 offset:404 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v47, off, s[0:3], s33 offset:408 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v46, off, s[0:3], s33 offset:412 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v45, off, s[0:3], s33 offset:416 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v44, off, s[0:3], s33 offset:420 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v43, off, s[0:3], s33 offset:424 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v42, off, s[0:3], s33 offset:428 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v41, off, s[0:3], s33 offset:432 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s33 offset:436 ; 4-byte Folded Reload
+; GCN-NEXT:    s_add_i32 s32, s32, 0xffff8c00
+; GCN-NEXT:    v_readlane_b32 s33, v254, 2
+; GCN-NEXT:    s_or_saveexec_b64 s[4:5], -1
+; GCN-NEXT:    buffer_load_dword v254, off, s[0:3], s32 offset:444 ; 4-byte Folded Reload
+; GCN-NEXT:    s_mov_b64 exec, s[4:5]
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    s_setpc_b64 s[30:31]
   %alloca = alloca i32, align 4, addrspace(5)
   store volatile i32 0, i32 addrspace(5)* %alloca
 
@@ -96,14 +594,254 @@ define void @spill_to_lowest_available_vgpr() #0 {
   ret void
 }
 
-; GCN-LABEL: {{^}}spill_sgpr_with_sgpr_uses:
-; GCN-NOT:  buffer_store_dword v255, off, s[0:3], s32
-; GCN: ; def s4
-; GCN: v_writelane_b32 v254, s4, 0
-; GCN: v_readlane_b32 s4, v254, 0
-; GCN: ; use s4
-
 define void @spill_sgpr_with_sgpr_uses() #0 {
+; GCN-LABEL: spill_sgpr_with_sgpr_uses:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    s_or_saveexec_b64 s[4:5], -1
+; GCN-NEXT:    buffer_store_dword v254, off, s[0:3], s32 offset:444 ; 4-byte Folded Spill
+; GCN-NEXT:    s_mov_b64 exec, s[4:5]
+; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s32 offset:436 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v41, off, s[0:3], s32 offset:432 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v42, off, s[0:3], s32 offset:428 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v43, off, s[0:3], s32 offset:424 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v44, off, s[0:3], s32 offset:420 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v45, off, s[0:3], s32 offset:416 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v46, off, s[0:3], s32 offset:412 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v47, off, s[0:3], s32 offset:408 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v56, off, s[0:3], s32 offset:404 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v57, off, s[0:3], s32 offset:400 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v58, off, s[0:3], s32 offset:396 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v59, off, s[0:3], s32 offset:392 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v60, off, s[0:3], s32 offset:388 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v61, off, s[0:3], s32 offset:384 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v62, off, s[0:3], s32 offset:380 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v63, off, s[0:3], s32 offset:376 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v72, off, s[0:3], s32 offset:372 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v73, off, s[0:3], s32 offset:368 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v74, off, s[0:3], s32 offset:364 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v75, off, s[0:3], s32 offset:360 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v76, off, s[0:3], s32 offset:356 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v77, off, s[0:3], s32 offset:352 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v78, off, s[0:3], s32 offset:348 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v79, off, s[0:3], s32 offset:344 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v88, off, s[0:3], s32 offset:340 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v89, off, s[0:3], s32 offset:336 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v90, off, s[0:3], s32 offset:332 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v91, off, s[0:3], s32 offset:328 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v92, off, s[0:3], s32 offset:324 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v93, off, s[0:3], s32 offset:320 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v94, off, s[0:3], s32 offset:316 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v95, off, s[0:3], s32 offset:312 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v104, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v105, off, s[0:3], s32 offset:304 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v106, off, s[0:3], s32 offset:300 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v107, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v108, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v109, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v110, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v111, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v120, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v121, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v122, off, s[0:3], s32 offset:268 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v123, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v124, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v125, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v126, off, s[0:3], s32 offset:252 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v127, off, s[0:3], s32 offset:248 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v136, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v137, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v138, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v139, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v140, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v141, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v142, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v143, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v152, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v153, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v154, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v155, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v156, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v157, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v158, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v159, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v168, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v169, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v170, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v171, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v172, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v173, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v174, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v175, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v184, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v185, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v186, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v187, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v188, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v189, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v190, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v191, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v200, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v201, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v202, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v203, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v204, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v205, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v206, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v207, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v216, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v217, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v218, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v219, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v220, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v221, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v222, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v223, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v232, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v233, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v234, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v235, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v236, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v237, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v238, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v239, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v248, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v249, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v250, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v251, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v252, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v253, off, s[0:3], s32 ; 4-byte Folded Spill
+; GCN-NEXT:    v_mov_b32_e32 v0, 0
+; GCN-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:440
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ; def s4
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    v_writelane_b32 v254, s4, 0
+; GCN-NEXT:    s_cbranch_scc1 .LBB3_2
+; GCN-NEXT:  ; %bb.1: ; %bb0
+; GCN-NEXT:    v_readlane_b32 s4, v254, 0
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ; use s4
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:  .LBB3_2: ; %ret
+; GCN-NEXT:    buffer_load_dword v253, off, s[0:3], s32 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v252, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v251, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v250, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v249, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v248, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v239, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v238, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v237, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v236, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v235, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v234, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v233, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v232, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v223, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v222, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v221, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v220, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v219, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v218, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v217, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v216, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v207, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v206, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v205, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v204, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v203, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v202, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v201, off, s[0:3], s32 offset:112 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v200, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v191, off, s[0:3], s32 offset:120 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v190, off, s[0:3], s32 offset:124 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v189, off, s[0:3], s32 offset:128 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v188, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v187, off, s[0:3], s32 offset:136 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v186, off, s[0:3], s32 offset:140 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v185, off, s[0:3], s32 offset:144 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v184, off, s[0:3], s32 offset:148 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v175, off, s[0:3], s32 offset:152 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v174, off, s[0:3], s32 offset:156 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v173, off, s[0:3], s32 offset:160 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v172, off, s[0:3], s32 offset:164 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v171, off, s[0:3], s32 offset:168 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v170, off, s[0:3], s32 offset:172 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v169, off, s[0:3], s32 offset:176 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v168, off, s[0:3], s32 offset:180 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v159, off, s[0:3], s32 offset:184 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v158, off, s[0:3], s32 offset:188 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v157, off, s[0:3], s32 offset:192 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v156, off, s[0:3], s32 offset:196 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v155, off, s[0:3], s32 offset:200 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v154, off, s[0:3], s32 offset:204 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v153, off, s[0:3], s32 offset:208 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v152, off, s[0:3], s32 offset:212 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v143, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v142, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v141, off, s[0:3], s32 offset:224 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v140, off, s[0:3], s32 offset:228 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v139, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v138, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v137, off, s[0:3], s32 offset:240 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v136, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v127, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v126, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v125, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v124, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v123, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v122, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v121, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v120, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v111, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v110, off, s[0:3], s32 offset:284 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v109, off, s[0:3], s32 offset:288 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v108, off, s[0:3], s32 offset:292 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v107, off, s[0:3], s32 offset:296 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v106, off, s[0:3], s32 offset:300 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v105, off, s[0:3], s32 offset:304 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v104, off, s[0:3], s32 offset:308 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v95, off, s[0:3], s32 offset:312 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v94, off, s[0:3], s32 offset:316 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v93, off, s[0:3], s32 offset:320 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v92, off, s[0:3], s32 offset:324 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v91, off, s[0:3], s32 offset:328 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v90, off, s[0:3], s32 offset:332 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v89, off, s[0:3], s32 offset:336 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v88, off, s[0:3], s32 offset:340 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v79, off, s[0:3], s32 offset:344 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v78, off, s[0:3], s32 offset:348 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v77, off, s[0:3], s32 offset:352 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v76, off, s[0:3], s32 offset:356 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v75, off, s[0:3], s32 offset:360 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v74, off, s[0:3], s32 offset:364 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v73, off, s[0:3], s32 offset:368 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v72, off, s[0:3], s32 offset:372 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v63, off, s[0:3], s32 offset:376 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v62, off, s[0:3], s32 offset:380 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v61, off, s[0:3], s32 offset:384 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v60, off, s[0:3], s32 offset:388 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v59, off, s[0:3], s32 offset:392 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v58, off, s[0:3], s32 offset:396 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v57, off, s[0:3], s32 offset:400 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v56, off, s[0:3], s32 offset:404 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v47, off, s[0:3], s32 offset:408 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v46, off, s[0:3], s32 offset:412 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v45, off, s[0:3], s32 offset:416 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v44, off, s[0:3], s32 offset:420 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v43, off, s[0:3], s32 offset:424 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v42, off, s[0:3], s32 offset:428 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v41, off, s[0:3], s32 offset:432 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s32 offset:436 ; 4-byte Folded Reload
+; GCN-NEXT:    s_or_saveexec_b64 s[4:5], -1
+; GCN-NEXT:    buffer_load_dword v254, off, s[0:3], s32 offset:444 ; 4-byte Folded Reload
+; GCN-NEXT:    s_mov_b64 exec, s[4:5]
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    s_setpc_b64 s[30:31]
   %alloca = alloca i32, align 4, addrspace(5)
   store volatile i32 0, i32 addrspace(5)* %alloca
 
@@ -147,12 +885,243 @@ ret:
   ret void
 }
 
-; GCN-LABEL: {{^}}spill_sgpr_with_tail_call
-; GCN-NOT:  buffer_store_dword v255, off, s[0:3], s32
-; GCN-NOT:  v_writelane
-; GCN:  s_setpc_b64 s[4:5]
-
 define void @spill_sgpr_with_tail_call() #0 {
+; GCN-LABEL: spill_sgpr_with_tail_call:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s32 offset:440 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v41, off, s[0:3], s32 offset:436 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v42, off, s[0:3], s32 offset:432 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v43, off, s[0:3], s32 offset:428 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v44, off, s[0:3], s32 offset:424 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v45, off, s[0:3], s32 offset:420 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v46, off, s[0:3], s32 offset:416 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v47, off, s[0:3], s32 offset:412 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v56, off, s[0:3], s32 offset:408 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v57, off, s[0:3], s32 offset:404 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v58, off, s[0:3], s32 offset:400 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v59, off, s[0:3], s32 offset:396 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v60, off, s[0:3], s32 offset:392 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v61, off, s[0:3], s32 offset:388 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v62, off, s[0:3], s32 offset:384 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v63, off, s[0:3], s32 offset:380 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v72, off, s[0:3], s32 offset:376 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v73, off, s[0:3], s32 offset:372 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v74, off, s[0:3], s32 offset:368 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v75, off, s[0:3], s32 offset:364 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v76, off, s[0:3], s32 offset:360 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v77, off, s[0:3], s32 offset:356 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v78, off, s[0:3], s32 offset:352 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v79, off, s[0:3], s32 offset:348 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v88, off, s[0:3], s32 offset:344 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v89, off, s[0:3], s32 offset:340 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v90, off, s[0:3], s32 offset:336 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v91, off, s[0:3], s32 offset:332 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v92, off, s[0:3], s32 offset:328 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v93, off, s[0:3], s32 offset:324 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v94, off, s[0:3], s32 offset:320 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v95, off, s[0:3], s32 offset:316 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v104, off, s[0:3], s32 offset:312 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v105, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v106, off, s[0:3], s32 offset:304 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v107, off, s[0:3], s32 offset:300 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v108, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v109, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v110, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v111, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v120, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v121, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v122, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v123, off, s[0:3], s32 offset:268 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v124, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v125, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v126, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v127, off, s[0:3], s32 offset:252 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v136, off, s[0:3], s32 offset:248 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v137, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v138, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v139, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v140, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v141, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v142, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v143, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v152, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v153, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v154, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v155, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v156, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v157, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v158, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v159, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v168, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v169, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v170, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v171, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v172, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v173, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v174, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v175, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v184, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v185, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v186, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v187, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v188, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v189, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v190, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v191, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v200, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v201, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v202, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v203, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v204, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v205, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v206, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v207, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v216, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v217, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v218, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v219, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v220, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v221, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v222, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v223, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v232, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v233, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v234, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v235, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v236, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v237, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v238, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v239, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v248, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v249, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v250, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v251, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v252, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v253, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v254, off, s[0:3], s32 ; 4-byte Folded Spill
+; GCN-NEXT:    v_mov_b32_e32 v0, 0
+; GCN-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:444
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    s_getpc_b64 s[4:5]
+; GCN-NEXT:    s_add_u32 s4, s4, child_function at gotpcrel32@lo+4
+; GCN-NEXT:    s_addc_u32 s5, s5, child_function at gotpcrel32@hi+12
+; GCN-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GCN-NEXT:    buffer_load_dword v254, off, s[0:3], s32 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v253, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v252, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v251, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v250, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v249, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v248, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v239, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v238, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v237, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v236, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v235, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v234, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v233, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v232, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v223, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v222, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v221, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v220, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v219, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v218, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v217, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v216, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v207, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v206, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v205, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v204, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v203, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v202, off, s[0:3], s32 offset:112 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v201, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v200, off, s[0:3], s32 offset:120 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v191, off, s[0:3], s32 offset:124 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v190, off, s[0:3], s32 offset:128 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v189, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v188, off, s[0:3], s32 offset:136 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v187, off, s[0:3], s32 offset:140 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v186, off, s[0:3], s32 offset:144 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v185, off, s[0:3], s32 offset:148 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v184, off, s[0:3], s32 offset:152 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v175, off, s[0:3], s32 offset:156 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v174, off, s[0:3], s32 offset:160 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v173, off, s[0:3], s32 offset:164 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v172, off, s[0:3], s32 offset:168 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v171, off, s[0:3], s32 offset:172 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v170, off, s[0:3], s32 offset:176 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v169, off, s[0:3], s32 offset:180 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v168, off, s[0:3], s32 offset:184 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v159, off, s[0:3], s32 offset:188 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v158, off, s[0:3], s32 offset:192 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v157, off, s[0:3], s32 offset:196 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v156, off, s[0:3], s32 offset:200 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v155, off, s[0:3], s32 offset:204 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v154, off, s[0:3], s32 offset:208 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v153, off, s[0:3], s32 offset:212 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v152, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v143, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v142, off, s[0:3], s32 offset:224 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v141, off, s[0:3], s32 offset:228 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v140, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v139, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v138, off, s[0:3], s32 offset:240 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v137, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v136, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v127, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v126, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v125, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v124, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v123, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v122, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v121, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v120, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v111, off, s[0:3], s32 offset:284 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v110, off, s[0:3], s32 offset:288 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v109, off, s[0:3], s32 offset:292 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v108, off, s[0:3], s32 offset:296 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v107, off, s[0:3], s32 offset:300 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v106, off, s[0:3], s32 offset:304 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v105, off, s[0:3], s32 offset:308 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v104, off, s[0:3], s32 offset:312 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v95, off, s[0:3], s32 offset:316 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v94, off, s[0:3], s32 offset:320 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v93, off, s[0:3], s32 offset:324 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v92, off, s[0:3], s32 offset:328 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v91, off, s[0:3], s32 offset:332 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v90, off, s[0:3], s32 offset:336 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v89, off, s[0:3], s32 offset:340 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v88, off, s[0:3], s32 offset:344 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v79, off, s[0:3], s32 offset:348 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v78, off, s[0:3], s32 offset:352 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v77, off, s[0:3], s32 offset:356 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v76, off, s[0:3], s32 offset:360 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v75, off, s[0:3], s32 offset:364 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v74, off, s[0:3], s32 offset:368 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v73, off, s[0:3], s32 offset:372 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v72, off, s[0:3], s32 offset:376 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v63, off, s[0:3], s32 offset:380 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v62, off, s[0:3], s32 offset:384 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v61, off, s[0:3], s32 offset:388 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v60, off, s[0:3], s32 offset:392 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v59, off, s[0:3], s32 offset:396 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v58, off, s[0:3], s32 offset:400 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v57, off, s[0:3], s32 offset:404 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v56, off, s[0:3], s32 offset:408 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v47, off, s[0:3], s32 offset:412 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v46, off, s[0:3], s32 offset:416 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v45, off, s[0:3], s32 offset:420 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v44, off, s[0:3], s32 offset:424 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v43, off, s[0:3], s32 offset:428 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v42, off, s[0:3], s32 offset:432 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v41, off, s[0:3], s32 offset:436 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s32 offset:440 ; 4-byte Folded Reload
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_setpc_b64 s[4:5]
   %alloca = alloca i32, align 4, addrspace(5)
   store volatile i32 0, i32 addrspace(5)* %alloca
 
@@ -187,29 +1156,281 @@ define void @spill_sgpr_with_tail_call() #0 {
   ret void
 }
 
-; Special case where all registers are explicitly clobbered in the function and
-; we have no VGPR to allocate for SGPR spills. We are forced to spill to memory.
-
-; GCN-LABEL: {{^}}spill_sgpr_no_free_vgpr:
-; GCN: v_writelane_b32 v{{[0-9]+}}, s34, 0
-; GCN: v_writelane_b32 v{{[0-9]+}}, s35, 1
-; GCN: v_writelane_b32 v{{[0-9]+}}, s36, 2
-; GCN: v_writelane_b32 v{{[0-9]+}}, s37, 3
-; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s32
-; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s32
-; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s32
-; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s32
-; GCN: #ASMEND
-; GCN: buffer_load_dword v{{[0-9]+}}
-; GCN: buffer_load_dword v{{[0-9]+}}
-; GCN: buffer_load_dword v{{[0-9]+}}
-; GCN: buffer_load_dword v{{[0-9]+}}
-; GCN: v_readlane_b32 s37, v{{[0-9]+}}, 3
-; GCN: v_readlane_b32 s36, v{{[0-9]+}}, 2
-; GCN: v_readlane_b32 s35, v{{[0-9]+}}, 1
-; GCN: v_readlane_b32 s34, v{{[0-9]+}}, 0
-
 define void @spill_sgpr_no_free_vgpr(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
+; GCN-LABEL: spill_sgpr_no_free_vgpr:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    s_or_saveexec_b64 s[4:5], -1
+; GCN-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:448 ; 4-byte Folded Spill
+; GCN-NEXT:    s_mov_b64 exec, s[4:5]
+; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s32 offset:444 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v41, off, s[0:3], s32 offset:440 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v42, off, s[0:3], s32 offset:436 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v43, off, s[0:3], s32 offset:432 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v44, off, s[0:3], s32 offset:428 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v45, off, s[0:3], s32 offset:424 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v46, off, s[0:3], s32 offset:420 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v47, off, s[0:3], s32 offset:416 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v56, off, s[0:3], s32 offset:412 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v57, off, s[0:3], s32 offset:408 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v58, off, s[0:3], s32 offset:404 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v59, off, s[0:3], s32 offset:400 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v60, off, s[0:3], s32 offset:396 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v61, off, s[0:3], s32 offset:392 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v62, off, s[0:3], s32 offset:388 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v63, off, s[0:3], s32 offset:384 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v72, off, s[0:3], s32 offset:380 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v73, off, s[0:3], s32 offset:376 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v74, off, s[0:3], s32 offset:372 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v75, off, s[0:3], s32 offset:368 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v76, off, s[0:3], s32 offset:364 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v77, off, s[0:3], s32 offset:360 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v78, off, s[0:3], s32 offset:356 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v79, off, s[0:3], s32 offset:352 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v88, off, s[0:3], s32 offset:348 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v89, off, s[0:3], s32 offset:344 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v90, off, s[0:3], s32 offset:340 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v91, off, s[0:3], s32 offset:336 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v92, off, s[0:3], s32 offset:332 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v93, off, s[0:3], s32 offset:328 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v94, off, s[0:3], s32 offset:324 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v95, off, s[0:3], s32 offset:320 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v104, off, s[0:3], s32 offset:316 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v105, off, s[0:3], s32 offset:312 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v106, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v107, off, s[0:3], s32 offset:304 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v108, off, s[0:3], s32 offset:300 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v109, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v110, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v111, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v120, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v121, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v122, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v123, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v124, off, s[0:3], s32 offset:268 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v125, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v126, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v127, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v136, off, s[0:3], s32 offset:252 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v137, off, s[0:3], s32 offset:248 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v138, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v139, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v140, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v141, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v142, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v143, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v152, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v153, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v154, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v155, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v156, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v157, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v158, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v159, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v168, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v169, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v170, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v171, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v172, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v173, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v174, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v175, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v184, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v185, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v186, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v187, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v188, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v189, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v190, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v191, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v200, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v201, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v202, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v203, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v204, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v205, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v206, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v207, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v216, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v217, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v218, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v219, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v220, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v221, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v222, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v223, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v232, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v233, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v234, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v235, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v236, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v237, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v238, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v239, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v248, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v249, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v250, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v251, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v252, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v253, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v254, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v255, off, s[0:3], s32 ; 4-byte Folded Spill
+; GCN-NEXT:    v_writelane_b32 v4, s34, 0
+; GCN-NEXT:    v_writelane_b32 v4, s35, 1
+; GCN-NEXT:    v_writelane_b32 v4, s36, 2
+; GCN-NEXT:    v_writelane_b32 v4, s37, 3
+; GCN-NEXT:    v_mov_b32_e32 v5, v3
+; GCN-NEXT:    v_mov_b32_e32 v3, v1
+; GCN-NEXT:    ; implicit-def: $sgpr4
+; GCN-NEXT:    ; implicit-def: $sgpr4
+; GCN-NEXT:    ; kill: def $vgpr3 killed $vgpr3 killed $exec
+; GCN-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
+; GCN-NEXT:    v_mov_b32_e32 v1, v3
+; GCN-NEXT:    ; implicit-def: $sgpr4
+; GCN-NEXT:    ; implicit-def: $sgpr4
+; GCN-NEXT:    ; kill: def $vgpr5 killed $vgpr5 killed $exec
+; GCN-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GCN-NEXT:    v_mov_b32_e32 v3, v5
+; GCN-NEXT:    ; implicit-def: $sgpr4_sgpr5
+; GCN-NEXT:    ; implicit-def: $sgpr4_sgpr5
+; GCN-NEXT:    flat_load_dwordx4 v[5:8], v[2:3]
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:452 ; 4-byte Folded Spill
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:456 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:460 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:464 ; 4-byte Folded Spill
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:452 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:456 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:460 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:464 ; 4-byte Folded Reload
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    flat_store_dwordx4 v[0:1], v[5:8]
+; GCN-NEXT:    v_readlane_b32 s37, v4, 3
+; GCN-NEXT:    v_readlane_b32 s36, v4, 2
+; GCN-NEXT:    v_readlane_b32 s35, v4, 1
+; GCN-NEXT:    v_readlane_b32 s34, v4, 0
+; GCN-NEXT:    buffer_load_dword v255, off, s[0:3], s32 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v254, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v253, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v252, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v251, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v250, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v249, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v248, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v239, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v238, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v237, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v236, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v235, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v234, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v233, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v232, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v223, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v222, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v221, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v220, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v219, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v218, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v217, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v216, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v207, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v206, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v205, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v204, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v203, off, s[0:3], s32 offset:112 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v202, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v201, off, s[0:3], s32 offset:120 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v200, off, s[0:3], s32 offset:124 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v191, off, s[0:3], s32 offset:128 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v190, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v189, off, s[0:3], s32 offset:136 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v188, off, s[0:3], s32 offset:140 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v187, off, s[0:3], s32 offset:144 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v186, off, s[0:3], s32 offset:148 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v185, off, s[0:3], s32 offset:152 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v184, off, s[0:3], s32 offset:156 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v175, off, s[0:3], s32 offset:160 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v174, off, s[0:3], s32 offset:164 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v173, off, s[0:3], s32 offset:168 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v172, off, s[0:3], s32 offset:172 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v171, off, s[0:3], s32 offset:176 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v170, off, s[0:3], s32 offset:180 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v169, off, s[0:3], s32 offset:184 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v168, off, s[0:3], s32 offset:188 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v159, off, s[0:3], s32 offset:192 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v158, off, s[0:3], s32 offset:196 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v157, off, s[0:3], s32 offset:200 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v156, off, s[0:3], s32 offset:204 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v155, off, s[0:3], s32 offset:208 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v154, off, s[0:3], s32 offset:212 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v153, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v152, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v143, off, s[0:3], s32 offset:224 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v142, off, s[0:3], s32 offset:228 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v141, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v140, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v139, off, s[0:3], s32 offset:240 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v138, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v137, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v136, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v127, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v126, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v125, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v124, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v123, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v122, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v121, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v120, off, s[0:3], s32 offset:284 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v111, off, s[0:3], s32 offset:288 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v110, off, s[0:3], s32 offset:292 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v109, off, s[0:3], s32 offset:296 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v108, off, s[0:3], s32 offset:300 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v107, off, s[0:3], s32 offset:304 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v106, off, s[0:3], s32 offset:308 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v105, off, s[0:3], s32 offset:312 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v104, off, s[0:3], s32 offset:316 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v95, off, s[0:3], s32 offset:320 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v94, off, s[0:3], s32 offset:324 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v93, off, s[0:3], s32 offset:328 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v92, off, s[0:3], s32 offset:332 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v91, off, s[0:3], s32 offset:336 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v90, off, s[0:3], s32 offset:340 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v89, off, s[0:3], s32 offset:344 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v88, off, s[0:3], s32 offset:348 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v79, off, s[0:3], s32 offset:352 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v78, off, s[0:3], s32 offset:356 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v77, off, s[0:3], s32 offset:360 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v76, off, s[0:3], s32 offset:364 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v75, off, s[0:3], s32 offset:368 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v74, off, s[0:3], s32 offset:372 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v73, off, s[0:3], s32 offset:376 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v72, off, s[0:3], s32 offset:380 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v63, off, s[0:3], s32 offset:384 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v62, off, s[0:3], s32 offset:388 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v61, off, s[0:3], s32 offset:392 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v60, off, s[0:3], s32 offset:396 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v59, off, s[0:3], s32 offset:400 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v58, off, s[0:3], s32 offset:404 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v57, off, s[0:3], s32 offset:408 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v56, off, s[0:3], s32 offset:412 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v47, off, s[0:3], s32 offset:416 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v46, off, s[0:3], s32 offset:420 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v45, off, s[0:3], s32 offset:424 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v44, off, s[0:3], s32 offset:428 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v43, off, s[0:3], s32 offset:432 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v42, off, s[0:3], s32 offset:436 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v41, off, s[0:3], s32 offset:440 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s32 offset:444 ; 4-byte Folded Reload
+; GCN-NEXT:    s_or_saveexec_b64 s[4:5], -1
+; GCN-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:448 ; 4-byte Folded Reload
+; GCN-NEXT:    s_mov_b64 exec, s[4:5]
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    s_setpc_b64 s[30:31]
   %a = load <4 x i32>, <4 x i32> addrspace(1)* %in
   call void asm sideeffect "",
   "~{v6},~{v7},~{v8},~{v9}
@@ -246,11 +1467,13 @@ define void @spill_sgpr_no_free_vgpr(<4 x i32> addrspace(1)* %out, <4 x i32> add
   ret void
 }
 
-; If IPRA no-CSR optimization is enabled, we will not be able to allocate an
-; SGPR for VGPR spills in the parent function since this child function uses all
-; VGPRs.
-
 define internal void @child_function_ipra() #0 {
+; GCN-LABEL: child_function_ipra:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    s_setpc_b64 s[30:31]
   call void asm sideeffect "",
   "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9}
   ,~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19}
@@ -281,22 +1504,519 @@ define internal void @child_function_ipra() #0 {
   ret void
 }
 
-; GCN-LABEL: {{^}}spill_sgpr_no_free_vgpr_ipra:
-; GCN: v_writelane_b32 v0, s30, 0
-; GCN: buffer_store_dword v0, off
-; GCN: v_writelane_b32 v0, s31, 0
-; GCN: buffer_store_dword v0, off
-; GCN: swappc
-; GCN: buffer_load_dword v0, off
-; GCN: v_readlane_b32 s31, v0, 0
-; GCN: buffer_load_dword v0, off
-; GCN: v_readlane_b32 s30, v0, 0
 define void @spill_sgpr_no_free_vgpr_ipra() #0 {
+; GCN-LABEL: spill_sgpr_no_free_vgpr_ipra:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    s_mov_b32 s6, s33
+; GCN-NEXT:    s_mov_b32 s33, s32
+; GCN-NEXT:    s_add_i32 s32, s32, 0x7400
+; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s33 offset:444 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v41, off, s[0:3], s33 offset:440 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v42, off, s[0:3], s33 offset:436 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v43, off, s[0:3], s33 offset:432 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v44, off, s[0:3], s33 offset:428 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v45, off, s[0:3], s33 offset:424 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v46, off, s[0:3], s33 offset:420 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v47, off, s[0:3], s33 offset:416 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v56, off, s[0:3], s33 offset:412 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v57, off, s[0:3], s33 offset:408 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v58, off, s[0:3], s33 offset:404 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v59, off, s[0:3], s33 offset:400 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v60, off, s[0:3], s33 offset:396 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v61, off, s[0:3], s33 offset:392 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v62, off, s[0:3], s33 offset:388 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v63, off, s[0:3], s33 offset:384 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v72, off, s[0:3], s33 offset:380 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v73, off, s[0:3], s33 offset:376 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v74, off, s[0:3], s33 offset:372 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v75, off, s[0:3], s33 offset:368 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v76, off, s[0:3], s33 offset:364 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v77, off, s[0:3], s33 offset:360 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v78, off, s[0:3], s33 offset:356 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v79, off, s[0:3], s33 offset:352 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v88, off, s[0:3], s33 offset:348 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v89, off, s[0:3], s33 offset:344 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v90, off, s[0:3], s33 offset:340 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v91, off, s[0:3], s33 offset:336 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v92, off, s[0:3], s33 offset:332 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v93, off, s[0:3], s33 offset:328 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v94, off, s[0:3], s33 offset:324 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v95, off, s[0:3], s33 offset:320 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v104, off, s[0:3], s33 offset:316 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v105, off, s[0:3], s33 offset:312 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v106, off, s[0:3], s33 offset:308 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v107, off, s[0:3], s33 offset:304 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v108, off, s[0:3], s33 offset:300 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v109, off, s[0:3], s33 offset:296 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v110, off, s[0:3], s33 offset:292 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v111, off, s[0:3], s33 offset:288 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v120, off, s[0:3], s33 offset:284 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v121, off, s[0:3], s33 offset:280 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v122, off, s[0:3], s33 offset:276 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v123, off, s[0:3], s33 offset:272 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v124, off, s[0:3], s33 offset:268 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v125, off, s[0:3], s33 offset:264 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v126, off, s[0:3], s33 offset:260 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v127, off, s[0:3], s33 offset:256 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v136, off, s[0:3], s33 offset:252 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v137, off, s[0:3], s33 offset:248 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v138, off, s[0:3], s33 offset:244 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v139, off, s[0:3], s33 offset:240 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v140, off, s[0:3], s33 offset:236 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v141, off, s[0:3], s33 offset:232 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v142, off, s[0:3], s33 offset:228 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v143, off, s[0:3], s33 offset:224 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v152, off, s[0:3], s33 offset:220 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v153, off, s[0:3], s33 offset:216 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v154, off, s[0:3], s33 offset:212 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v155, off, s[0:3], s33 offset:208 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v156, off, s[0:3], s33 offset:204 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v157, off, s[0:3], s33 offset:200 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v158, off, s[0:3], s33 offset:196 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v159, off, s[0:3], s33 offset:192 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v168, off, s[0:3], s33 offset:188 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v169, off, s[0:3], s33 offset:184 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v170, off, s[0:3], s33 offset:180 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v171, off, s[0:3], s33 offset:176 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v172, off, s[0:3], s33 offset:172 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v173, off, s[0:3], s33 offset:168 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v174, off, s[0:3], s33 offset:164 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v175, off, s[0:3], s33 offset:160 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v184, off, s[0:3], s33 offset:156 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v185, off, s[0:3], s33 offset:152 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v186, off, s[0:3], s33 offset:148 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v187, off, s[0:3], s33 offset:144 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v188, off, s[0:3], s33 offset:140 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v189, off, s[0:3], s33 offset:136 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v190, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v191, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v200, off, s[0:3], s33 offset:124 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v201, off, s[0:3], s33 offset:120 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v202, off, s[0:3], s33 offset:116 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v203, off, s[0:3], s33 offset:112 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v204, off, s[0:3], s33 offset:108 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v205, off, s[0:3], s33 offset:104 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v206, off, s[0:3], s33 offset:100 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v207, off, s[0:3], s33 offset:96 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v216, off, s[0:3], s33 offset:92 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v217, off, s[0:3], s33 offset:88 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v218, off, s[0:3], s33 offset:84 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v219, off, s[0:3], s33 offset:80 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v220, off, s[0:3], s33 offset:76 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v221, off, s[0:3], s33 offset:72 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v222, off, s[0:3], s33 offset:68 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v223, off, s[0:3], s33 offset:64 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v232, off, s[0:3], s33 offset:60 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v233, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v234, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v235, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v236, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v237, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v238, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v239, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v248, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v249, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v250, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v251, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v252, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v253, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v254, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v255, off, s[0:3], s33 ; 4-byte Folded Spill
+; GCN-NEXT:    s_mov_b64 s[14:15], exec
+; GCN-NEXT:    s_mov_b64 exec, 1
+; GCN-NEXT:    buffer_store_dword v1, off, s[0:3], s33 offset:456
+; GCN-NEXT:    v_writelane_b32 v1, s30, 0
+; GCN-NEXT:    buffer_store_dword v1, off, s[0:3], s33 offset:448 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_load_dword v1, off, s[0:3], s33 offset:456
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    s_mov_b64 exec, s[14:15]
+; GCN-NEXT:    s_mov_b64 s[12:13], exec
+; GCN-NEXT:    s_mov_b64 exec, 1
+; GCN-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:456
+; GCN-NEXT:    v_writelane_b32 v0, s31, 0
+; GCN-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:452 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:456
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    s_mov_b64 exec, s[12:13]
+; GCN-NEXT:    s_getpc_b64 s[4:5]
+; GCN-NEXT:    s_add_u32 s4, s4, child_function_ipra at rel32@lo+4
+; GCN-NEXT:    s_addc_u32 s5, s5, child_function_ipra at rel32@hi+12
+; GCN-NEXT:    s_mov_b64 s[10:11], s[2:3]
+; GCN-NEXT:    s_mov_b64 s[8:9], s[0:1]
+; GCN-NEXT:    s_mov_b64 s[0:1], s[8:9]
+; GCN-NEXT:    s_mov_b64 s[2:3], s[10:11]
+; GCN-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GCN-NEXT:    s_mov_b64 s[8:9], exec
+; GCN-NEXT:    s_mov_b64 exec, 1
+; GCN-NEXT:    buffer_store_dword v1, off, s[0:3], s33 offset:456
+; GCN-NEXT:    buffer_load_dword v1, off, s[0:3], s33 offset:452 ; 4-byte Folded Reload
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    v_readlane_b32 s31, v1, 0
+; GCN-NEXT:    buffer_load_dword v1, off, s[0:3], s33 offset:456
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    s_mov_b64 exec, s[8:9]
+; GCN-NEXT:    s_mov_b64 s[4:5], exec
+; GCN-NEXT:    s_mov_b64 exec, 1
+; GCN-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:456
+; GCN-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:448 ; 4-byte Folded Reload
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    v_readlane_b32 s30, v0, 0
+; GCN-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:456
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    s_mov_b64 exec, s[4:5]
+; GCN-NEXT:    buffer_load_dword v255, off, s[0:3], s33 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v254, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v253, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v252, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v251, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v250, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v249, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v248, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v239, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v238, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v237, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v236, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v235, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v234, off, s[0:3], s33 offset:52 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v233, off, s[0:3], s33 offset:56 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v232, off, s[0:3], s33 offset:60 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v223, off, s[0:3], s33 offset:64 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v222, off, s[0:3], s33 offset:68 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v221, off, s[0:3], s33 offset:72 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v220, off, s[0:3], s33 offset:76 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v219, off, s[0:3], s33 offset:80 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v218, off, s[0:3], s33 offset:84 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v217, off, s[0:3], s33 offset:88 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v216, off, s[0:3], s33 offset:92 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v207, off, s[0:3], s33 offset:96 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v206, off, s[0:3], s33 offset:100 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v205, off, s[0:3], s33 offset:104 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v204, off, s[0:3], s33 offset:108 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v203, off, s[0:3], s33 offset:112 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v202, off, s[0:3], s33 offset:116 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v201, off, s[0:3], s33 offset:120 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v200, off, s[0:3], s33 offset:124 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v191, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v190, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v189, off, s[0:3], s33 offset:136 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v188, off, s[0:3], s33 offset:140 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v187, off, s[0:3], s33 offset:144 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v186, off, s[0:3], s33 offset:148 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v185, off, s[0:3], s33 offset:152 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v184, off, s[0:3], s33 offset:156 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v175, off, s[0:3], s33 offset:160 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v174, off, s[0:3], s33 offset:164 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v173, off, s[0:3], s33 offset:168 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v172, off, s[0:3], s33 offset:172 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v171, off, s[0:3], s33 offset:176 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v170, off, s[0:3], s33 offset:180 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v169, off, s[0:3], s33 offset:184 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v168, off, s[0:3], s33 offset:188 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v159, off, s[0:3], s33 offset:192 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v158, off, s[0:3], s33 offset:196 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v157, off, s[0:3], s33 offset:200 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v156, off, s[0:3], s33 offset:204 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v155, off, s[0:3], s33 offset:208 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v154, off, s[0:3], s33 offset:212 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v153, off, s[0:3], s33 offset:216 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v152, off, s[0:3], s33 offset:220 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v143, off, s[0:3], s33 offset:224 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v142, off, s[0:3], s33 offset:228 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v141, off, s[0:3], s33 offset:232 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v140, off, s[0:3], s33 offset:236 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v139, off, s[0:3], s33 offset:240 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v138, off, s[0:3], s33 offset:244 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v137, off, s[0:3], s33 offset:248 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v136, off, s[0:3], s33 offset:252 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v127, off, s[0:3], s33 offset:256 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v126, off, s[0:3], s33 offset:260 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v125, off, s[0:3], s33 offset:264 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v124, off, s[0:3], s33 offset:268 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v123, off, s[0:3], s33 offset:272 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v122, off, s[0:3], s33 offset:276 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v121, off, s[0:3], s33 offset:280 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v120, off, s[0:3], s33 offset:284 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v111, off, s[0:3], s33 offset:288 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v110, off, s[0:3], s33 offset:292 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v109, off, s[0:3], s33 offset:296 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v108, off, s[0:3], s33 offset:300 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v107, off, s[0:3], s33 offset:304 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v106, off, s[0:3], s33 offset:308 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v105, off, s[0:3], s33 offset:312 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v104, off, s[0:3], s33 offset:316 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v95, off, s[0:3], s33 offset:320 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v94, off, s[0:3], s33 offset:324 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v93, off, s[0:3], s33 offset:328 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v92, off, s[0:3], s33 offset:332 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v91, off, s[0:3], s33 offset:336 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v90, off, s[0:3], s33 offset:340 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v89, off, s[0:3], s33 offset:344 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v88, off, s[0:3], s33 offset:348 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v79, off, s[0:3], s33 offset:352 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v78, off, s[0:3], s33 offset:356 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v77, off, s[0:3], s33 offset:360 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v76, off, s[0:3], s33 offset:364 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v75, off, s[0:3], s33 offset:368 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v74, off, s[0:3], s33 offset:372 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v73, off, s[0:3], s33 offset:376 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v72, off, s[0:3], s33 offset:380 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v63, off, s[0:3], s33 offset:384 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v62, off, s[0:3], s33 offset:388 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v61, off, s[0:3], s33 offset:392 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v60, off, s[0:3], s33 offset:396 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v59, off, s[0:3], s33 offset:400 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v58, off, s[0:3], s33 offset:404 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v57, off, s[0:3], s33 offset:408 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v56, off, s[0:3], s33 offset:412 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v47, off, s[0:3], s33 offset:416 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v46, off, s[0:3], s33 offset:420 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v45, off, s[0:3], s33 offset:424 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v44, off, s[0:3], s33 offset:428 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v43, off, s[0:3], s33 offset:432 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v42, off, s[0:3], s33 offset:436 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v41, off, s[0:3], s33 offset:440 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s33 offset:444 ; 4-byte Folded Reload
+; GCN-NEXT:    s_add_i32 s32, s32, 0xffff8c00
+; GCN-NEXT:    s_mov_b32 s33, s6
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    s_setpc_b64 s[30:31]
   call void @child_function_ipra()
   ret void
 }
 
 define internal void @child_function_ipra_tail_call() #0 {
+; GCN-LABEL: child_function_ipra_tail_call:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s32 offset:444 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v41, off, s[0:3], s32 offset:440 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v42, off, s[0:3], s32 offset:436 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v43, off, s[0:3], s32 offset:432 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v44, off, s[0:3], s32 offset:428 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v45, off, s[0:3], s32 offset:424 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v46, off, s[0:3], s32 offset:420 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v47, off, s[0:3], s32 offset:416 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v56, off, s[0:3], s32 offset:412 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v57, off, s[0:3], s32 offset:408 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v58, off, s[0:3], s32 offset:404 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v59, off, s[0:3], s32 offset:400 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v60, off, s[0:3], s32 offset:396 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v61, off, s[0:3], s32 offset:392 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v62, off, s[0:3], s32 offset:388 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v63, off, s[0:3], s32 offset:384 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v72, off, s[0:3], s32 offset:380 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v73, off, s[0:3], s32 offset:376 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v74, off, s[0:3], s32 offset:372 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v75, off, s[0:3], s32 offset:368 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v76, off, s[0:3], s32 offset:364 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v77, off, s[0:3], s32 offset:360 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v78, off, s[0:3], s32 offset:356 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v79, off, s[0:3], s32 offset:352 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v88, off, s[0:3], s32 offset:348 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v89, off, s[0:3], s32 offset:344 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v90, off, s[0:3], s32 offset:340 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v91, off, s[0:3], s32 offset:336 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v92, off, s[0:3], s32 offset:332 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v93, off, s[0:3], s32 offset:328 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v94, off, s[0:3], s32 offset:324 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v95, off, s[0:3], s32 offset:320 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v104, off, s[0:3], s32 offset:316 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v105, off, s[0:3], s32 offset:312 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v106, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v107, off, s[0:3], s32 offset:304 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v108, off, s[0:3], s32 offset:300 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v109, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v110, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v111, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v120, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v121, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v122, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v123, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v124, off, s[0:3], s32 offset:268 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v125, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v126, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v127, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v136, off, s[0:3], s32 offset:252 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v137, off, s[0:3], s32 offset:248 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v138, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v139, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v140, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v141, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v142, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v143, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v152, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v153, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v154, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v155, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v156, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v157, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v158, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v159, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v168, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v169, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v170, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v171, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v172, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v173, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v174, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v175, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v184, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v185, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v186, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v187, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v188, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v189, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v190, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v191, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v200, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v201, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v202, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v203, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v204, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v205, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v206, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v207, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v216, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v217, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v218, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v219, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v220, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v221, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v222, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v223, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v232, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v233, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v234, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v235, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v236, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v237, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v238, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v239, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v248, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v249, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v250, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v251, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v252, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v253, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v254, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v255, off, s[0:3], s32 ; 4-byte Folded Spill
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    buffer_load_dword v255, off, s[0:3], s32 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v254, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v253, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v252, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v251, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v250, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v249, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v248, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v239, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v238, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v237, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v236, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v235, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v234, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v233, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v232, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v223, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v222, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v221, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v220, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v219, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v218, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v217, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v216, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v207, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v206, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v205, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v204, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v203, off, s[0:3], s32 offset:112 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v202, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v201, off, s[0:3], s32 offset:120 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v200, off, s[0:3], s32 offset:124 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v191, off, s[0:3], s32 offset:128 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v190, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v189, off, s[0:3], s32 offset:136 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v188, off, s[0:3], s32 offset:140 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v187, off, s[0:3], s32 offset:144 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v186, off, s[0:3], s32 offset:148 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v185, off, s[0:3], s32 offset:152 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v184, off, s[0:3], s32 offset:156 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v175, off, s[0:3], s32 offset:160 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v174, off, s[0:3], s32 offset:164 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v173, off, s[0:3], s32 offset:168 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v172, off, s[0:3], s32 offset:172 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v171, off, s[0:3], s32 offset:176 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v170, off, s[0:3], s32 offset:180 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v169, off, s[0:3], s32 offset:184 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v168, off, s[0:3], s32 offset:188 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v159, off, s[0:3], s32 offset:192 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v158, off, s[0:3], s32 offset:196 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v157, off, s[0:3], s32 offset:200 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v156, off, s[0:3], s32 offset:204 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v155, off, s[0:3], s32 offset:208 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v154, off, s[0:3], s32 offset:212 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v153, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v152, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v143, off, s[0:3], s32 offset:224 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v142, off, s[0:3], s32 offset:228 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v141, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v140, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v139, off, s[0:3], s32 offset:240 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v138, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v137, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v136, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v127, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v126, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v125, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v124, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v123, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v122, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v121, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v120, off, s[0:3], s32 offset:284 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v111, off, s[0:3], s32 offset:288 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v110, off, s[0:3], s32 offset:292 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v109, off, s[0:3], s32 offset:296 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v108, off, s[0:3], s32 offset:300 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v107, off, s[0:3], s32 offset:304 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v106, off, s[0:3], s32 offset:308 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v105, off, s[0:3], s32 offset:312 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v104, off, s[0:3], s32 offset:316 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v95, off, s[0:3], s32 offset:320 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v94, off, s[0:3], s32 offset:324 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v93, off, s[0:3], s32 offset:328 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v92, off, s[0:3], s32 offset:332 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v91, off, s[0:3], s32 offset:336 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v90, off, s[0:3], s32 offset:340 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v89, off, s[0:3], s32 offset:344 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v88, off, s[0:3], s32 offset:348 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v79, off, s[0:3], s32 offset:352 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v78, off, s[0:3], s32 offset:356 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v77, off, s[0:3], s32 offset:360 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v76, off, s[0:3], s32 offset:364 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v75, off, s[0:3], s32 offset:368 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v74, off, s[0:3], s32 offset:372 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v73, off, s[0:3], s32 offset:376 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v72, off, s[0:3], s32 offset:380 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v63, off, s[0:3], s32 offset:384 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v62, off, s[0:3], s32 offset:388 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v61, off, s[0:3], s32 offset:392 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v60, off, s[0:3], s32 offset:396 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v59, off, s[0:3], s32 offset:400 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v58, off, s[0:3], s32 offset:404 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v57, off, s[0:3], s32 offset:408 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v56, off, s[0:3], s32 offset:412 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v47, off, s[0:3], s32 offset:416 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v46, off, s[0:3], s32 offset:420 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v45, off, s[0:3], s32 offset:424 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v44, off, s[0:3], s32 offset:428 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v43, off, s[0:3], s32 offset:432 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v42, off, s[0:3], s32 offset:436 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v41, off, s[0:3], s32 offset:440 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s32 offset:444 ; 4-byte Folded Reload
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    s_setpc_b64 s[30:31]
   call void asm sideeffect "",
   "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9}
   ,~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19}
@@ -327,14 +2047,14 @@ define internal void @child_function_ipra_tail_call() #0 {
   ret void
 }
 
-; GCN-LABEL: {{^}}spill_sgpr_no_free_vgpr_ipra_tail_call:
-; GCN-NOT: v_writelane_b32
-; GCN-NOT: buffer_store_dword
-; GCN-NOT: swappc
-; GCN-NOT: buffer_load_dword v0, off
-; GCN-NOT: v_readlane_b32
-; GCN: setpc
 define void @spill_sgpr_no_free_vgpr_ipra_tail_call() #0 {
+; GCN-LABEL: spill_sgpr_no_free_vgpr_ipra_tail_call:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    s_getpc_b64 s[4:5]
+; GCN-NEXT:    s_add_u32 s4, s4, child_function_ipra_tail_call at rel32@lo+4
+; GCN-NEXT:    s_addc_u32 s5, s5, child_function_ipra_tail_call at rel32@hi+12
+; GCN-NEXT:    s_setpc_b64 s[4:5]
   tail call void @child_function_ipra_tail_call()
   ret void
 }

diff  --git a/llvm/test/CodeGen/AMDGPU/spill-offset-calculation.ll b/llvm/test/CodeGen/AMDGPU/spill-offset-calculation.ll
index 8077a0b6adfb..34bc7523051f 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-offset-calculation.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-offset-calculation.ll
@@ -1,12 +1,45 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-misched=0 -post-RA-scheduler=0 -stress-regalloc=8 < %s | FileCheck -check-prefixes=GCN,MUBUF %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-misched=0 -post-RA-scheduler=0 -stress-regalloc=8 -mattr=+enable-flat-scratch < %s | FileCheck -check-prefixes=GCN,FLATSCR %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-misched=0 -post-RA-scheduler=0 -stress-regalloc=8 < %s | FileCheck -check-prefix=MUBUF %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-misched=0 -post-RA-scheduler=0 -stress-regalloc=8 -mattr=+enable-flat-scratch < %s | FileCheck -check-prefix=FLATSCR %s
 
 ; Test that the VGPR spiller correctly switches to SGPR offsets when the
 ; instruction offset field would overflow, and that it accounts for memory
 ; swizzling.
 
-; GCN-LABEL: test_inst_offset_kernel
 define amdgpu_kernel void @test_inst_offset_kernel() {
+; MUBUF-LABEL: test_inst_offset_kernel:
+; MUBUF:       ; %bb.0: ; %entry
+; MUBUF-NEXT:    s_add_u32 s0, s0, s7
+; MUBUF-NEXT:    s_addc_u32 s1, s1, 0
+; MUBUF-NEXT:    buffer_load_dword v0, off, s[0:3], 0 offset:8 glc
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    buffer_store_dword v0, off, s[0:3], 0 offset:4092 ; 4-byte Folded Spill
+; MUBUF-NEXT:    ;;#ASMSTART
+; MUBUF-NEXT:    ;;#ASMEND
+; MUBUF-NEXT:    buffer_load_dword v0, off, s[0:3], 0 offset:4092 ; 4-byte Folded Reload
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    buffer_store_dword v0, off, s[0:3], 0 offset:8
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    s_endpgm
+;
+; FLATSCR-LABEL: test_inst_offset_kernel:
+; FLATSCR:       ; %bb.0: ; %entry
+; FLATSCR-NEXT:    s_add_u32 flat_scratch_lo, s0, s3
+; FLATSCR-NEXT:    s_addc_u32 flat_scratch_hi, s1, 0
+; FLATSCR-NEXT:    s_mov_b32 vcc_lo, 0
+; FLATSCR-NEXT:    scratch_load_dword v0, off, vcc_lo offset:8 glc
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    s_movk_i32 s0, 0xffc
+; FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
+; FLATSCR-NEXT:    scratch_store_dword off, v0, s0 ; 4-byte Folded Spill
+; FLATSCR-NEXT:    ;;#ASMSTART
+; FLATSCR-NEXT:    ;;#ASMEND
+; FLATSCR-NEXT:    s_movk_i32 s0, 0xffc
+; FLATSCR-NEXT:    scratch_load_dword v0, off, s0 ; 4-byte Folded Reload
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    scratch_store_dword off, v0, vcc_hi offset:8
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    s_endpgm
 entry:
   ; Occupy 4092 bytes of scratch, so the offset of the spill of %a just fits in
   ; the instruction offset field.
@@ -14,8 +47,8 @@ entry:
   %buf = bitcast i8 addrspace(5)* %alloca to i32 addrspace(5)*
 
   %aptr = getelementptr i32, i32 addrspace(5)* %buf, i32 1
-  ; MUBUF:   buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], 0 offset:4092 ; 4-byte Folded Spill
-  ; FLATSCR: scratch_store_dword off, v{{[0-9]+}}, s{{[0-9]+}} ; 4-byte Folded Spill
+
+
   %a = load volatile i32, i32 addrspace(5)* %aptr
 
   ; Force %a to spill.
@@ -27,8 +60,42 @@ entry:
   ret void
 }
 
-; GCN-LABEL: test_sgpr_offset_kernel
 define amdgpu_kernel void @test_sgpr_offset_kernel() {
+; MUBUF-LABEL: test_sgpr_offset_kernel:
+; MUBUF:       ; %bb.0: ; %entry
+; MUBUF-NEXT:    s_add_u32 s0, s0, s7
+; MUBUF-NEXT:    s_addc_u32 s1, s1, 0
+; MUBUF-NEXT:    buffer_load_dword v0, off, s[0:3], 0 offset:8 glc
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    s_mov_b32 s4, 0x40000
+; MUBUF-NEXT:    buffer_store_dword v0, off, s[0:3], s4 ; 4-byte Folded Spill
+; MUBUF-NEXT:    ;;#ASMSTART
+; MUBUF-NEXT:    ;;#ASMEND
+; MUBUF-NEXT:    s_mov_b32 s4, 0x40000
+; MUBUF-NEXT:    buffer_load_dword v0, off, s[0:3], s4 ; 4-byte Folded Reload
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    buffer_store_dword v0, off, s[0:3], 0 offset:8
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    s_endpgm
+;
+; FLATSCR-LABEL: test_sgpr_offset_kernel:
+; FLATSCR:       ; %bb.0: ; %entry
+; FLATSCR-NEXT:    s_add_u32 flat_scratch_lo, s0, s3
+; FLATSCR-NEXT:    s_addc_u32 flat_scratch_hi, s1, 0
+; FLATSCR-NEXT:    s_mov_b32 vcc_lo, 0
+; FLATSCR-NEXT:    scratch_load_dword v0, off, vcc_lo offset:8 glc
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    s_movk_i32 s0, 0x1000
+; FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
+; FLATSCR-NEXT:    scratch_store_dword off, v0, s0 ; 4-byte Folded Spill
+; FLATSCR-NEXT:    ;;#ASMSTART
+; FLATSCR-NEXT:    ;;#ASMEND
+; FLATSCR-NEXT:    s_movk_i32 s0, 0x1000
+; FLATSCR-NEXT:    scratch_load_dword v0, off, s0 ; 4-byte Folded Reload
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    scratch_store_dword off, v0, vcc_hi offset:8
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    s_endpgm
 entry:
   ; Occupy 4096 bytes of scratch, so the offset of the spill of %a does not
   ; fit in the instruction, and has to live in the SGPR offset.
@@ -37,12 +104,7 @@ entry:
 
   %aptr = getelementptr i32, i32 addrspace(5)* %buf, i32 1
   ; 0x40000 / 64 = 4096 (for wave64)
-  ; MUBUF:   s_mov_b32 s4, 0x40000
-  ; MUBUF:   buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s4 ; 4-byte Folded Spill
-  ; FLATSCR: s_movk_i32 s2, 0x1000
-  ; FLATSCR: scratch_store_dword off, v{{[0-9]+}}, s2 ; 4-byte Folded Spill
   %a = load volatile i32, i32 addrspace(5)* %aptr
-
   ; Force %a to spill
   call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" ()
 
@@ -52,11 +114,50 @@ entry:
   ret void
 }
 
-; FIXME: If we fail to scavenge an SGPR in a kernel we don't have a stack
-; pointer to temporarily update, so we just crash.
-
-; GCN-LABEL: test_sgpr_offset_function_scavenge_fail_func
 define void @test_sgpr_offset_function_scavenge_fail_func() #2 {
+; MUBUF-LABEL: test_sgpr_offset_function_scavenge_fail_func:
+; MUBUF:       ; %bb.0: ; %entry
+; MUBUF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; MUBUF-NEXT:    ;;#ASMSTART
+; MUBUF-NEXT:    ;;#ASMEND
+; MUBUF-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:8 glc
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    v_mov_b32_e32 v1, 0x1004
+; MUBUF-NEXT:    buffer_store_dword v0, v1, s[0:3], s32 offen ; 4-byte Folded Spill
+; MUBUF-NEXT:    ;;#ASMSTART
+; MUBUF-NEXT:    ;;#ASMEND
+; MUBUF-NEXT:    ;;#ASMSTART
+; MUBUF-NEXT:    ;;#ASMEND
+; MUBUF-NEXT:    ;;#ASMSTART
+; MUBUF-NEXT:    ;;#ASMEND
+; MUBUF-NEXT:    v_mov_b32_e32 v1, 0x1004
+; MUBUF-NEXT:    buffer_load_dword v0, v1, s[0:3], s32 offen ; 4-byte Folded Reload
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    ;;#ASMSTART
+; MUBUF-NEXT:    ;;#ASMEND
+; MUBUF-NEXT:    s_setpc_b64 s[30:31]
+;
+; FLATSCR-LABEL: test_sgpr_offset_function_scavenge_fail_func:
+; FLATSCR:       ; %bb.0: ; %entry
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; FLATSCR-NEXT:    ;;#ASMSTART
+; FLATSCR-NEXT:    ;;#ASMEND
+; FLATSCR-NEXT:    scratch_load_dword v0, off, s32 offset:8 glc
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    s_add_i32 s8, s32, 0x1004
+; FLATSCR-NEXT:    scratch_store_dword off, v0, s8 ; 4-byte Folded Spill
+; FLATSCR-NEXT:    ;;#ASMSTART
+; FLATSCR-NEXT:    ;;#ASMEND
+; FLATSCR-NEXT:    ;;#ASMSTART
+; FLATSCR-NEXT:    ;;#ASMEND
+; FLATSCR-NEXT:    ;;#ASMSTART
+; FLATSCR-NEXT:    ;;#ASMEND
+; FLATSCR-NEXT:    s_add_i32 s8, s32, 0x1004
+; FLATSCR-NEXT:    scratch_load_dword v0, off, s8 ; 4-byte Folded Reload
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    ;;#ASMSTART
+; FLATSCR-NEXT:    ;;#ASMEND
+; FLATSCR-NEXT:    s_setpc_b64 s[30:31]
 entry:
   ; Occupy 4096 bytes of scratch, so the offset of the spill of %a does not
   ; fit in the instruction, and has to live in the SGPR offset.
@@ -77,12 +178,6 @@ entry:
 
   ; 0x40000 / 64 = 4096 (for wave64)
   %a = load volatile i32, i32 addrspace(5)* %aptr
-
-  ; MUBUF:   v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0x1004
-  ; MUBUF-NEXT: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+:[0-9]+}}], s32 offen ; 4-byte Folded Spill
-
-; FLATSCR: s_add_i32 [[SOFF:s[0-9]+]], s32, 0x1004
-  ; FLATSCR: scratch_store_dword off, v{{[0-9]+}}, [[SOFF]] ; 4-byte Folded Spill
   call void asm sideeffect "", "s,s,s,s,s,s,s,s,v"(i32 %asm0.0, i32 %asm1.0, i32 %asm2.0, i32 %asm3.0, i32 %asm4.0, i32 %asm5.0, i32 %asm6.0, i32 %asm7.0, i32 %a)
 
   %asm = call { i32, i32, i32, i32, i32, i32, i32, i32 } asm sideeffect "", "=s,=s,=s,=s,=s,=s,=s,=s"()
@@ -96,18 +191,58 @@ entry:
   %asm7 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32 } %asm, 7
 
   call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}"() #0
-
-  ; MUBUF:   v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0x1004
-  ; MUBUF-NEXT: buffer_load_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+:[0-9]+}}], s32 offen ; 4-byte Folded Reload
-  ; FLATSCR: s_add_i32 [[SOFF:s[0-9]+]], s32, 0x1004
-  ; FLATSCR: scratch_load_dword v{{[0-9]+}}, off, [[SOFF]] ; 4-byte Folded Reload
-
    ; Force %a to spill with no free SGPRs
   call void asm sideeffect "", "s,s,s,s,s,s,s,s,v"(i32 %asm0, i32 %asm1, i32 %asm2, i32 %asm3, i32 %asm4, i32 %asm5, i32 %asm6, i32 %asm7, i32 %a)
   ret void
 }
 
 define amdgpu_kernel void @test_sgpr_offset_function_scavenge_fail_kernel() #3 {
+; MUBUF-LABEL: test_sgpr_offset_function_scavenge_fail_kernel:
+; MUBUF:       ; %bb.0: ; %entry
+; MUBUF-NEXT:    s_add_u32 s0, s0, s7
+; MUBUF-NEXT:    s_addc_u32 s1, s1, 0
+; MUBUF-NEXT:    ;;#ASMSTART
+; MUBUF-NEXT:    ;;#ASMEND
+; MUBUF-NEXT:    buffer_load_dword v0, off, s[0:3], 0 offset:8 glc
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    v_mov_b32_e32 v1, 0x1004
+; MUBUF-NEXT:    buffer_store_dword v0, v1, s[0:3], 0 offen ; 4-byte Folded Spill
+; MUBUF-NEXT:    ;;#ASMSTART
+; MUBUF-NEXT:    ;;#ASMEND
+; MUBUF-NEXT:    ;;#ASMSTART
+; MUBUF-NEXT:    ;;#ASMEND
+; MUBUF-NEXT:    ;;#ASMSTART
+; MUBUF-NEXT:    ;;#ASMEND
+; MUBUF-NEXT:    v_mov_b32_e32 v1, 0x1004
+; MUBUF-NEXT:    buffer_load_dword v0, v1, s[0:3], 0 offen ; 4-byte Folded Reload
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    ;;#ASMSTART
+; MUBUF-NEXT:    ;;#ASMEND
+; MUBUF-NEXT:    s_endpgm
+;
+; FLATSCR-LABEL: test_sgpr_offset_function_scavenge_fail_kernel:
+; FLATSCR:       ; %bb.0: ; %entry
+; FLATSCR-NEXT:    s_add_u32 flat_scratch_lo, s0, s3
+; FLATSCR-NEXT:    s_addc_u32 flat_scratch_hi, s1, 0
+; FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
+; FLATSCR-NEXT:    ;;#ASMSTART
+; FLATSCR-NEXT:    ;;#ASMEND
+; FLATSCR-NEXT:    scratch_load_dword v0, off, vcc_hi offset:8 glc
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    s_movk_i32 s8, 0x1004
+; FLATSCR-NEXT:    scratch_store_dword off, v0, s8 ; 4-byte Folded Spill
+; FLATSCR-NEXT:    ;;#ASMSTART
+; FLATSCR-NEXT:    ;;#ASMEND
+; FLATSCR-NEXT:    ;;#ASMSTART
+; FLATSCR-NEXT:    ;;#ASMEND
+; FLATSCR-NEXT:    ;;#ASMSTART
+; FLATSCR-NEXT:    ;;#ASMEND
+; FLATSCR-NEXT:    s_movk_i32 s8, 0x1004
+; FLATSCR-NEXT:    scratch_load_dword v0, off, s8 ; 4-byte Folded Reload
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    ;;#ASMSTART
+; FLATSCR-NEXT:    ;;#ASMEND
+; FLATSCR-NEXT:    s_endpgm
 entry:
   ; Occupy 4096 bytes of scratch, so the offset of the spill of %a does not
   ; fit in the instruction, and has to live in the SGPR offset.
@@ -128,12 +263,6 @@ entry:
 
   ; 0x40000 / 64 = 4096 (for wave64)
   %a = load volatile i32, i32 addrspace(5)* %aptr
-
-  ; MUBUF: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0x1004
-  ; MUBUF: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+:[0-9]+}}], 0 offen ; 4-byte Folded Spill
-
-  ; FLATSCR: s_movk_i32 [[SOFF:s[0-9]+]], 0x1004
-  ; FLATSCR: scratch_store_dword off, v{{[0-9]+}}, [[SOFF]] ; 4-byte Folded Spill
   call void asm sideeffect "", "s,s,s,s,s,s,s,s,v"(i32 %asm0.0, i32 %asm1.0, i32 %asm2.0, i32 %asm3.0, i32 %asm4.0, i32 %asm5.0, i32 %asm6.0, i32 %asm7.0, i32 %a)
 
   %asm = call { i32, i32, i32, i32, i32, i32, i32, i32 } asm sideeffect "", "=s,=s,=s,=s,=s,=s,=s,=s"()
@@ -147,19 +276,57 @@ entry:
   %asm7 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32 } %asm, 7
 
   call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}"() #0
-
-  ; MUBUF: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0x1004
-  ; MUBUF: buffer_load_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+:[0-9]+}}], 0 offen ; 4-byte Folded Reload
-  ; FLATSCR: s_movk_i32 [[SOFF:s[0-9]+]], 0x1004
-  ; FLATSCR: scratch_load_dword v{{[0-9]+}}, off, [[SOFF]] ; 4-byte Folded Reload
-
    ; Force %a to spill with no free SGPRs
   call void asm sideeffect "", "s,s,s,s,s,s,s,s,v"(i32 %asm0, i32 %asm1, i32 %asm2, i32 %asm3, i32 %asm4, i32 %asm5, i32 %asm6, i32 %asm7, i32 %a)
   ret void
 }
 
-; GCN-LABEL: test_sgpr_offset_subregs_kernel
 define amdgpu_kernel void @test_sgpr_offset_subregs_kernel() {
+; MUBUF-LABEL: test_sgpr_offset_subregs_kernel:
+; MUBUF:       ; %bb.0: ; %entry
+; MUBUF-NEXT:    s_add_u32 s0, s0, s7
+; MUBUF-NEXT:    s_addc_u32 s1, s1, 0
+; MUBUF-NEXT:    buffer_load_dword v0, off, s[0:3], 0 offset:12 glc
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    buffer_load_dword v1, off, s[0:3], 0 offset:16 glc
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    buffer_store_dword v0, off, s[0:3], 0 offset:4088 ; 4-byte Folded Spill
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], 0 offset:4092 ; 4-byte Folded Spill
+; MUBUF-NEXT:    ;;#ASMSTART
+; MUBUF-NEXT:    ;;#ASMEND
+; MUBUF-NEXT:    buffer_load_dword v0, off, s[0:3], 0 offset:8 glc
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    buffer_load_dword v0, off, s[0:3], 0 offset:4088 ; 4-byte Folded Reload
+; MUBUF-NEXT:    s_nop 0
+; MUBUF-NEXT:    buffer_load_dword v1, off, s[0:3], 0 offset:4092 ; 4-byte Folded Reload
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    ;;#ASMSTART
+; MUBUF-NEXT:    ; v[0:1]
+; MUBUF-NEXT:    ;;#ASMEND
+; MUBUF-NEXT:    s_endpgm
+;
+; FLATSCR-LABEL: test_sgpr_offset_subregs_kernel:
+; FLATSCR:       ; %bb.0: ; %entry
+; FLATSCR-NEXT:    s_add_u32 flat_scratch_lo, s0, s3
+; FLATSCR-NEXT:    s_addc_u32 flat_scratch_hi, s1, 0
+; FLATSCR-NEXT:    s_mov_b32 vcc_lo, 0
+; FLATSCR-NEXT:    scratch_load_dwordx2 v[0:1], off, vcc_lo offset:12 glc
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    s_movk_i32 s0, 0xff8
+; FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
+; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s0 ; 8-byte Folded Spill
+; FLATSCR-NEXT:    ;;#ASMSTART
+; FLATSCR-NEXT:    ;;#ASMEND
+; FLATSCR-NEXT:    scratch_load_dword v0, off, vcc_hi offset:8 glc
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    s_movk_i32 s0, 0xff8
+; FLATSCR-NEXT:    scratch_load_dwordx2 v[0:1], off, s0 ; 8-byte Folded Reload
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    ;;#ASMSTART
+; FLATSCR-NEXT:    ; v[0:1]
+; FLATSCR-NEXT:    ;;#ASMEND
+; FLATSCR-NEXT:    s_endpgm
 entry:
   ; Occupy 4088 bytes of scratch, so that the spill of the last subreg of %a
   ; still fits below offset 4096 (4088 + 8 - 4 = 4092), and can be placed in
@@ -167,11 +334,6 @@ entry:
   %alloca = alloca i8, i32 4084, align 4, addrspace(5)
   %bufv1 = bitcast i8 addrspace(5)* %alloca to i32 addrspace(5)*
   %bufv2 = bitcast i8 addrspace(5)* %alloca to <2 x i32> addrspace(5)*
-
-  ; MUBUF:   buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], 0 offset:4088 ; 4-byte Folded Spill
-  ; MUBUF:   buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], 0 offset:4092 ; 4-byte Folded Spill
-  ; FLATSCR: s_movk_i32 [[SOFF:s[0-9]+]], 0xff8
-  ; FLATSCR: scratch_store_dwordx2 off, v[{{[0-9:]+}}], [[SOFF]]          ; 8-byte Folded Spill
   %aptr = getelementptr <2 x i32>, <2 x i32> addrspace(5)* %bufv2, i32 1
   %a = load volatile <2 x i32>, <2 x i32> addrspace(5)* %aptr
 
@@ -188,8 +350,54 @@ entry:
   ret void
 }
 
-; GCN-LABEL: test_inst_offset_subregs_kernel
 define amdgpu_kernel void @test_inst_offset_subregs_kernel() {
+; MUBUF-LABEL: test_inst_offset_subregs_kernel:
+; MUBUF:       ; %bb.0: ; %entry
+; MUBUF-NEXT:    s_add_u32 s0, s0, s7
+; MUBUF-NEXT:    s_addc_u32 s1, s1, 0
+; MUBUF-NEXT:    buffer_load_dword v0, off, s[0:3], 0 offset:12 glc
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    buffer_load_dword v1, off, s[0:3], 0 offset:16 glc
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    s_mov_b32 s4, 0x3ff00
+; MUBUF-NEXT:    buffer_store_dword v0, off, s[0:3], s4 ; 4-byte Folded Spill
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], s4 offset:4 ; 4-byte Folded Spill
+; MUBUF-NEXT:    ;;#ASMSTART
+; MUBUF-NEXT:    ;;#ASMEND
+; MUBUF-NEXT:    buffer_load_dword v0, off, s[0:3], 0 offset:8 glc
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    s_mov_b32 s4, 0x3ff00
+; MUBUF-NEXT:    buffer_load_dword v0, off, s[0:3], s4 ; 4-byte Folded Reload
+; MUBUF-NEXT:    s_nop 0
+; MUBUF-NEXT:    buffer_load_dword v1, off, s[0:3], s4 offset:4 ; 4-byte Folded Reload
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    ;;#ASMSTART
+; MUBUF-NEXT:    ; v[0:1]
+; MUBUF-NEXT:    ;;#ASMEND
+; MUBUF-NEXT:    s_endpgm
+;
+; FLATSCR-LABEL: test_inst_offset_subregs_kernel:
+; FLATSCR:       ; %bb.0: ; %entry
+; FLATSCR-NEXT:    s_add_u32 flat_scratch_lo, s0, s3
+; FLATSCR-NEXT:    s_addc_u32 flat_scratch_hi, s1, 0
+; FLATSCR-NEXT:    s_mov_b32 vcc_lo, 0
+; FLATSCR-NEXT:    scratch_load_dwordx2 v[0:1], off, vcc_lo offset:12 glc
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    s_movk_i32 s0, 0xffc
+; FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
+; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s0 ; 8-byte Folded Spill
+; FLATSCR-NEXT:    ;;#ASMSTART
+; FLATSCR-NEXT:    ;;#ASMEND
+; FLATSCR-NEXT:    scratch_load_dword v0, off, vcc_hi offset:8 glc
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    s_movk_i32 s0, 0xffc
+; FLATSCR-NEXT:    scratch_load_dwordx2 v[0:1], off, s0 ; 8-byte Folded Reload
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    ;;#ASMSTART
+; FLATSCR-NEXT:    ; v[0:1]
+; FLATSCR-NEXT:    ;;#ASMEND
+; FLATSCR-NEXT:    s_endpgm
 entry:
   ; Occupy 4092 bytes of scratch, so that the spill of the last subreg of %a
   ; does not fit below offset 4096 (4092 + 8 - 4 = 4096), and has to live
@@ -199,11 +407,6 @@ entry:
   %bufv2 = bitcast i8 addrspace(5)* %alloca to <2 x i32> addrspace(5)*
 
   ; 0x3ff00 / 64 = 4092 (for wave64)
-  ; MUBUF:   s_mov_b32 s4, 0x3ff00
-  ; MUBUF:   buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s4 ; 4-byte Folded Spill
-  ; MUBUF:   buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s4 offset:4 ; 4-byte Folded Spill
-  ; FLATSCR: s_movk_i32 [[SOFF:s[0-9]+]], 0xffc
-  ; FLATSCR: scratch_store_dwordx2 off, v[{{[0-9:]+}}], [[SOFF]]          ; 8-byte Folded Spill
   %aptr = getelementptr <2 x i32>, <2 x i32> addrspace(5)* %bufv2, i32 1
   %a = load volatile <2 x i32>, <2 x i32> addrspace(5)* %aptr
 
@@ -220,8 +423,34 @@ entry:
   ret void
 }
 
-; GCN-LABEL: test_inst_offset_function
 define void @test_inst_offset_function() {
+; MUBUF-LABEL: test_inst_offset_function:
+; MUBUF:       ; %bb.0: ; %entry
+; MUBUF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; MUBUF-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:4 glc
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:4088 ; 4-byte Folded Spill
+; MUBUF-NEXT:    ;;#ASMSTART
+; MUBUF-NEXT:    ;;#ASMEND
+; MUBUF-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:4088 ; 4-byte Folded Reload
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:4
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    s_setpc_b64 s[30:31]
+;
+; FLATSCR-LABEL: test_inst_offset_function:
+; FLATSCR:       ; %bb.0: ; %entry
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; FLATSCR-NEXT:    scratch_load_dword v0, off, s32 offset:4 glc
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    scratch_store_dword off, v0, s32 offset:4088 ; 4-byte Folded Spill
+; FLATSCR-NEXT:    ;;#ASMSTART
+; FLATSCR-NEXT:    ;;#ASMEND
+; FLATSCR-NEXT:    scratch_load_dword v0, off, s32 offset:4088 ; 4-byte Folded Reload
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    scratch_store_dword off, v0, s32 offset:4
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    s_setpc_b64 s[30:31]
 entry:
   ; Occupy enough bytes of scratch, so the offset of the spill of %a
   ; just fits in the instruction offset field when the emergency stack
@@ -231,8 +460,8 @@ entry:
   %buf = bitcast i8 addrspace(5)* %alloca to i32 addrspace(5)*
 
   %aptr = getelementptr i32, i32 addrspace(5)* %buf, i32 1
-  ; MUBUF:   buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4088 ; 4-byte Folded Spill
-  ; FLATSCR: scratch_store_dword off, v{{[0-9]+}}, s{{[0-9]+}} offset:4088 ; 4-byte Folded Spill
+
+
   %a = load volatile i32, i32 addrspace(5)* %aptr
 
   ; Force %a to spill.
@@ -244,8 +473,38 @@ entry:
   ret void
 }
 
-; GCN-LABEL: test_sgpr_offset_function
 define void @test_sgpr_offset_function() {
+; MUBUF-LABEL: test_sgpr_offset_function:
+; MUBUF:       ; %bb.0: ; %entry
+; MUBUF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; MUBUF-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:8 glc
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    s_add_i32 s4, s32, 0x40100
+; MUBUF-NEXT:    buffer_store_dword v0, off, s[0:3], s4 ; 4-byte Folded Spill
+; MUBUF-NEXT:    ;;#ASMSTART
+; MUBUF-NEXT:    ;;#ASMEND
+; MUBUF-NEXT:    s_add_i32 s4, s32, 0x40100
+; MUBUF-NEXT:    buffer_load_dword v0, off, s[0:3], s4 ; 4-byte Folded Reload
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:8
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    s_setpc_b64 s[30:31]
+;
+; FLATSCR-LABEL: test_sgpr_offset_function:
+; FLATSCR:       ; %bb.0: ; %entry
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; FLATSCR-NEXT:    scratch_load_dword v0, off, s32 offset:8 glc
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    s_add_i32 s0, s32, 0x1004
+; FLATSCR-NEXT:    scratch_store_dword off, v0, s0 ; 4-byte Folded Spill
+; FLATSCR-NEXT:    ;;#ASMSTART
+; FLATSCR-NEXT:    ;;#ASMEND
+; FLATSCR-NEXT:    s_add_i32 s0, s32, 0x1004
+; FLATSCR-NEXT:    scratch_load_dword v0, off, s0 ; 4-byte Folded Reload
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    scratch_store_dword off, v0, s32 offset:8
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    s_setpc_b64 s[30:31]
 entry:
   ; Occupy 4096 bytes of scratch, so the offset of the spill of %a does not
   ; fit in the instruction, and has to live in the SGPR offset.
@@ -254,10 +513,6 @@ entry:
 
   %aptr = getelementptr i32, i32 addrspace(5)* %buf, i32 1
   ; 0x40000 / 64 = 4096 (for wave64)
-  ; MUBUF:   s_add_i32 s4, s32, 0x40100
-  ; MUBUF:   buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s4 ; 4-byte Folded Spill
-  ; FLATSCR: s_add_i32 s0, s32, 0x1004
-  ; FLATSCR: scratch_store_dword off, v{{[0-9]+}}, s0 ; 4-byte Folded Spill
   %a = load volatile i32, i32 addrspace(5)* %aptr
 
   ; Force %a to spill
@@ -269,24 +524,57 @@ entry:
   ret void
 }
 
-; GCN-LABEL: test_sgpr_offset_subregs_function
 define void @test_sgpr_offset_subregs_function() {
+; MUBUF-LABEL: test_sgpr_offset_subregs_function:
+; MUBUF:       ; %bb.0: ; %entry
+; MUBUF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; MUBUF-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:8 glc
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:12 glc
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:4084 ; 4-byte Folded Spill
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:4088 ; 4-byte Folded Spill
+; MUBUF-NEXT:    ;;#ASMSTART
+; MUBUF-NEXT:    ;;#ASMEND
+; MUBUF-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:4 glc
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:4084 ; 4-byte Folded Reload
+; MUBUF-NEXT:    s_nop 0
+; MUBUF-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:4088 ; 4-byte Folded Reload
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    ;;#ASMSTART
+; MUBUF-NEXT:    ; v[0:1]
+; MUBUF-NEXT:    ;;#ASMEND
+; MUBUF-NEXT:    s_setpc_b64 s[30:31]
+;
+; FLATSCR-LABEL: test_sgpr_offset_subregs_function:
+; FLATSCR:       ; %bb.0: ; %entry
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; FLATSCR-NEXT:    scratch_load_dwordx2 v[0:1], off, s32 offset:8 glc
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s32 offset:4084 ; 8-byte Folded Spill
+; FLATSCR-NEXT:    ;;#ASMSTART
+; FLATSCR-NEXT:    ;;#ASMEND
+; FLATSCR-NEXT:    scratch_load_dword v0, off, s32 offset:4 glc
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    scratch_load_dwordx2 v[0:1], off, s32 offset:4084 ; 8-byte Folded Reload
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    ;;#ASMSTART
+; FLATSCR-NEXT:    ; v[0:1]
+; FLATSCR-NEXT:    ;;#ASMEND
+; FLATSCR-NEXT:    s_setpc_b64 s[30:31]
 entry:
   ; We want to test the spill of the last subreg of %a is the highest
   ; valid value for the immediate offset. We enable the emergency
   ; stack slot for large frames, so it's hard to get the frame layout
   ; exactly as we want to test it.
-  ;
   ; Occupy 4084 bytes of scratch, so that the spill of the last subreg of %a
   ; still fits below offset 4096 (4084 + 8 - 4 = 4092), and can be placed in
   ; the instruction offset field.
   %alloca = alloca i8, i32 4084, align 4, addrspace(5)
   %bufv1 = bitcast i8 addrspace(5)* %alloca to i32 addrspace(5)*
   %bufv2 = bitcast i8 addrspace(5)* %alloca to <2 x i32> addrspace(5)*
-
-  ; MUBUF:   buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4084 ; 4-byte Folded Spill
-  ; MUBUF:   buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4088 ; 4-byte Folded Spill
-  ; FLATSCR: scratch_store_dwordx2 off, v[{{[0-9:]+}}], s32 offset:4084 ; 8-byte Folded Spill
   %aptr = getelementptr <2 x i32>, <2 x i32> addrspace(5)* %bufv2, i32 1
   %a = load volatile <2 x i32>, <2 x i32> addrspace(5)* %aptr
 
@@ -303,8 +591,48 @@ entry:
   ret void
 }
 
-; GCN-LABEL: test_inst_offset_subregs_function
 define void @test_inst_offset_subregs_function() {
+; MUBUF-LABEL: test_inst_offset_subregs_function:
+; MUBUF:       ; %bb.0: ; %entry
+; MUBUF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; MUBUF-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:12 glc
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:16 glc
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    s_add_i32 s4, s32, 0x3ff00
+; MUBUF-NEXT:    buffer_store_dword v0, off, s[0:3], s4 ; 4-byte Folded Spill
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], s4 offset:4 ; 4-byte Folded Spill
+; MUBUF-NEXT:    ;;#ASMSTART
+; MUBUF-NEXT:    ;;#ASMEND
+; MUBUF-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:8 glc
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    s_add_i32 s4, s32, 0x3ff00
+; MUBUF-NEXT:    buffer_load_dword v0, off, s[0:3], s4 ; 4-byte Folded Reload
+; MUBUF-NEXT:    s_nop 0
+; MUBUF-NEXT:    buffer_load_dword v1, off, s[0:3], s4 offset:4 ; 4-byte Folded Reload
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    ;;#ASMSTART
+; MUBUF-NEXT:    ; v[0:1]
+; MUBUF-NEXT:    ;;#ASMEND
+; MUBUF-NEXT:    s_setpc_b64 s[30:31]
+;
+; FLATSCR-LABEL: test_inst_offset_subregs_function:
+; FLATSCR:       ; %bb.0: ; %entry
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; FLATSCR-NEXT:    scratch_load_dwordx2 v[0:1], off, s32 offset:12 glc
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s32 offset:4092 ; 8-byte Folded Spill
+; FLATSCR-NEXT:    ;;#ASMSTART
+; FLATSCR-NEXT:    ;;#ASMEND
+; FLATSCR-NEXT:    scratch_load_dword v0, off, s32 offset:8 glc
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    scratch_load_dwordx2 v[0:1], off, s32 offset:4092 ; 8-byte Folded Reload
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    ;;#ASMSTART
+; FLATSCR-NEXT:    ; v[0:1]
+; FLATSCR-NEXT:    ;;#ASMEND
+; FLATSCR-NEXT:    s_setpc_b64 s[30:31]
 entry:
   ; Occupy 4088 bytes of scratch, so that the spill of the last subreg of %a
   ; does not fit below offset 4096 (408 + 4 + 8 - 4 = 4096), and has to live
@@ -314,10 +642,6 @@ entry:
   %bufv2 = bitcast i8 addrspace(5)* %alloca to <2 x i32> addrspace(5)*
 
   ; 0x3ff0000 / 64 = 4092 (for wave64)
-  ; MUBUF: s_add_i32 s4, s32, 0x3ff00
-  ; MUBUF: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s4 ; 4-byte Folded Spill
-  ; MUBUF: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s4 offset:4 ; 4-byte Folded Spill
-  ; FLATSCR: scratch_store_dwordx2 off, v[{{[0-9:]+}}], s32 offset:4092 ; 8-byte Folded Spill
   %aptr = getelementptr <2 x i32>, <2 x i32> addrspace(5)* %bufv2, i32 1
   %a = load volatile <2 x i32>, <2 x i32> addrspace(5)* %aptr
 

diff  --git a/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll b/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll
index e0fc1e19b167..bdeb97cede4c 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -march=amdgcn -mcpu=verde -enable-misched=0 -post-RA-scheduler=0 -amdgpu-spill-sgpr-to-vgpr=0 < %s | FileCheck -check-prefixes=CHECK,GFX6 %s
 ; RUN: llc -sgpr-regalloc=basic -vgpr-regalloc=basic -march=amdgcn -mcpu=tonga -enable-misched=0 -post-RA-scheduler=0 -amdgpu-spill-sgpr-to-vgpr=0 < %s | FileCheck --check-prefix=CHECK %s
 ; RUN: llc -march=amdgcn -mattr=-xnack,+enable-flat-scratch -mcpu=gfx900 -enable-misched=0 -post-RA-scheduler=0 -amdgpu-spill-sgpr-to-vgpr=0 < %s | FileCheck -check-prefixes=CHECK,GFX9-FLATSCR,FLATSCR %s
@@ -12,17 +13,10044 @@
 ; mechanism works even when many spills happen.
 
 ; Just test that it compiles successfully.
-; CHECK-LABEL: test
-
-; GFX9-FLATSCR: s_mov_b32 [[SOFF1:s[0-9]+]], 4{{$}}
-; GFX9-FLATSCR: scratch_store_dwordx4 off, v[{{[0-9:]+}}], [[SOFF1]] ; 16-byte Folded Spill
-; GFX9-FLATSCR: ;;#ASMSTART
-; GFX9-FLATSCR: s_movk_i32 [[SOFF2:s[0-9]+]], 0x1{{[0-9a-f]+}}{{$}}
-; GFX9-FLATSCR: scratch_load_dwordx4 v[{{[0-9:]+}}], off, [[SOFF2]] ; 16-byte Folded Reload
-
-; GFX10-FLATSCR: scratch_store_dwordx4 off, v[{{[0-9:]+}}], off offset:{{[0-9]+}} ; 16-byte Folded Spill
-; GFX10-FLATSCR: scratch_load_dwordx4 v[{{[0-9:]+}}], off, off offset:{{[0-9]+}} ; 16-byte Folded Reload
 define amdgpu_kernel void @test(<1280 x i32> addrspace(1)* %out, <1280 x i32> addrspace(1)* %in) {
+; GFX6-LABEL: test:
+; GFX6:       ; %bb.0: ; %entry
+; GFX6-NEXT:    s_mov_b32 s44, SCRATCH_RSRC_DWORD0
+; GFX6-NEXT:    s_mov_b32 s45, SCRATCH_RSRC_DWORD1
+; GFX6-NEXT:    s_mov_b32 s46, -1
+; GFX6-NEXT:    s_mov_b32 s47, 0xe8f000
+; GFX6-NEXT:    s_add_u32 s44, s44, s3
+; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; GFX6-NEXT:    v_mbcnt_lo_u32_b32_e64 v0, -1, 0
+; GFX6-NEXT:    v_mbcnt_hi_u32_b32_e32 v0, -1, v0
+; GFX6-NEXT:    v_lshlrev_b32_e32 v5, 13, v0
+; GFX6-NEXT:    s_mov_b32 s18, 0
+; GFX6-NEXT:    s_mov_b32 s19, 0xf000
+; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX6-NEXT:    v_add_i32_e32 v0, vcc, s2, v5
+; GFX6-NEXT:    v_mov_b32_e32 v1, s3
+; GFX6-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GFX6-NEXT:    s_movk_i32 s4, 0x80
+; GFX6-NEXT:    s_mov_b32 s5, s18
+; GFX6-NEXT:    s_mov_b64 s[6:7], s[18:19]
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[4:7], 0 addr64 offset:3968
+; GFX6-NEXT:    s_addc_u32 s45, s45, 0
+; GFX6-NEXT:    s_movk_i32 s8, 0x100
+; GFX6-NEXT:    s_mov_b32 s9, s18
+; GFX6-NEXT:    s_mov_b64 s[10:11], s[18:19]
+; GFX6-NEXT:    s_movk_i32 s12, 0x180
+; GFX6-NEXT:    s_mov_b32 s13, s18
+; GFX6-NEXT:    s_mov_b64 s[14:15], s[18:19]
+; GFX6-NEXT:    s_movk_i32 s20, 0x200
+; GFX6-NEXT:    s_mov_b32 s21, s18
+; GFX6-NEXT:    s_mov_b64 s[22:23], s[18:19]
+; GFX6-NEXT:    s_movk_i32 s24, 0x280
+; GFX6-NEXT:    s_mov_b32 s25, s18
+; GFX6-NEXT:    s_mov_b64 s[26:27], s[18:19]
+; GFX6-NEXT:    s_movk_i32 s28, 0x300
+; GFX6-NEXT:    s_mov_b32 s29, s18
+; GFX6-NEXT:    s_mov_b64 s[30:31], s[18:19]
+; GFX6-NEXT:    s_movk_i32 s36, 0x380
+; GFX6-NEXT:    s_mov_b32 s37, s18
+; GFX6-NEXT:    s_mov_b64 s[38:39], s[18:19]
+; GFX6-NEXT:    s_movk_i32 s40, 0x400
+; GFX6-NEXT:    s_mov_b32 s41, s18
+; GFX6-NEXT:    s_mov_b64 s[42:43], s[18:19]
+; GFX6-NEXT:    s_mov_b64 s[16:17], s[2:3]
+; GFX6-NEXT:    v_mov_b32_e32 v6, 0
+; GFX6-NEXT:    s_mov_b32 s2, 0x3fd00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:1268 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:1272 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:1276 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:1280 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[4:7], 0 addr64 offset:3984
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:1300 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:1304 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:1308 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:1312 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[4:7], 0 addr64 offset:4000
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:1332 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:1336 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:1340 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:1344 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[4:7], 0 addr64 offset:4016
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:1364 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:1368 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:1372 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:1376 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[4:7], 0 addr64 offset:4032
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:1396 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:1400 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:1404 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:1408 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[4:7], 0 addr64 offset:4048
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:1428 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:1432 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:1436 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:1440 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[4:7], 0 addr64 offset:4064
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:1460 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:1464 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:1468 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:1472 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[4:7], 0 addr64 offset:4080
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:1492 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:1496 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:1500 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:1504 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[8:11], 0 addr64 offset:3968
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:1556 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:1560 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:1564 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:1568 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[8:11], 0 addr64 offset:3984
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:1588 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:1592 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:1596 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:1600 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[8:11], 0 addr64 offset:4000
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:1620 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:1624 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:1628 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:1632 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[8:11], 0 addr64 offset:4016
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:1652 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:1656 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:1660 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:1664 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[8:11], 0 addr64 offset:4032
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:1684 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:1688 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:1692 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:1696 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[8:11], 0 addr64 offset:4048
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:1716 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:1720 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:1724 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:1728 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[8:11], 0 addr64 offset:4064
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:1748 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:1752 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:1756 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:1760 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[8:11], 0 addr64 offset:4080
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:1780 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:1784 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:1788 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:1792 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[12:15], 0 addr64 offset:3968
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:1860 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:1864 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:1868 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:1872 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[12:15], 0 addr64 offset:3984
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:1892 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:1896 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:1900 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:1904 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[12:15], 0 addr64 offset:4000
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:1924 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:1928 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:1932 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:1936 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[12:15], 0 addr64 offset:4016
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:1956 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:1960 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:1964 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:1968 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[12:15], 0 addr64 offset:4032
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:1988 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:1992 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:1996 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:2000 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[12:15], 0 addr64 offset:4048
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:2020 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:2024 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:2028 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:2032 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[12:15], 0 addr64 offset:4064
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:2052 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:2056 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:2060 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:2064 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[12:15], 0 addr64 offset:4080
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:2084 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:2088 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:2092 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:2096 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[20:23], 0 addr64 offset:3968
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:2148 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:2152 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:2156 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:2160 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[20:23], 0 addr64 offset:3984
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:2180 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:2184 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:2188 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:2192 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[20:23], 0 addr64 offset:4000
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:2212 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:2216 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:2220 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:2224 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[20:23], 0 addr64 offset:4016
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:2244 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:2248 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:2252 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:2256 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[20:23], 0 addr64 offset:4032
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:2276 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:2280 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:2284 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:2288 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[20:23], 0 addr64 offset:4048
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:2308 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:2312 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:2316 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:2320 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[20:23], 0 addr64 offset:4064
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:2340 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:2344 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:2348 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:2352 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[20:23], 0 addr64 offset:4080
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:2372 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:2376 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:2380 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:2384 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[24:27], 0 addr64 offset:3968
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:2452 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:2456 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:2460 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:2464 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[24:27], 0 addr64 offset:3984
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:2484 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:2488 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:2492 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:2496 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[24:27], 0 addr64 offset:4000
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:2516 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:2520 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:2524 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:2528 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[24:27], 0 addr64 offset:4016
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:2548 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:2552 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:2556 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:2560 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[24:27], 0 addr64 offset:4032
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:2580 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:2584 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:2588 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:2592 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[24:27], 0 addr64 offset:4048
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:2612 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:2616 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:2620 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:2624 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[24:27], 0 addr64 offset:4064
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:2644 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:2648 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:2652 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:2656 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[24:27], 0 addr64 offset:4080
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:2676 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:2680 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:2684 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:2688 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[28:31], 0 addr64 offset:3968
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:2740 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:2744 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:2748 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:2752 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[28:31], 0 addr64 offset:3984
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:2772 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:2776 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:2780 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:2784 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[28:31], 0 addr64 offset:4000
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:2804 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:2808 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:2812 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:2816 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[28:31], 0 addr64 offset:4016
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:2836 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:2840 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:2844 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:2848 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[28:31], 0 addr64 offset:4032
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:2868 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:2872 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:2876 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:2880 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[28:31], 0 addr64 offset:4048
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:2900 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:2904 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:2908 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:2912 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[28:31], 0 addr64 offset:4064
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:2932 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:2936 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:2940 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:2944 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[28:31], 0 addr64 offset:4080
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:2964 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:2968 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:2972 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:2976 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[36:39], 0 addr64 offset:3968
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:3044 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:3048 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:3052 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:3056 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[36:39], 0 addr64 offset:3984
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:3076 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:3080 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:3084 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:3088 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[36:39], 0 addr64 offset:4000
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:3108 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:3112 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:3116 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:3120 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[36:39], 0 addr64 offset:4016
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:3140 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:3144 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:3148 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:3152 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[36:39], 0 addr64 offset:4032
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:3172 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:3176 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:3180 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:3184 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[36:39], 0 addr64 offset:4048
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:3204 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:3208 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:3212 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:3216 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[36:39], 0 addr64 offset:4064
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:3236 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:3240 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:3244 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:3248 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[36:39], 0 addr64 offset:4080
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:3268 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:3272 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:3276 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:3280 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[40:43], 0 addr64 offset:3968
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:3332 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:3336 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:3340 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:3344 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[40:43], 0 addr64 offset:3984
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:3364 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:3368 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:3372 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:3376 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[40:43], 0 addr64 offset:4000
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:3396 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:3400 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:3404 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:3408 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[40:43], 0 addr64 offset:4016
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:3428 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:3432 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:3436 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:3440 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[40:43], 0 addr64 offset:4032
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:3460 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:3464 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:3468 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:3472 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[40:43], 0 addr64 offset:4048
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:3492 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:3496 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:3500 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:3504 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[40:43], 0 addr64 offset:4064
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:3524 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:3528 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:3532 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:3536 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[40:43], 0 addr64 offset:4080
+; GFX6-NEXT:    s_waitcnt expcnt(3)
+; GFX6-NEXT:    v_add_i32_e32 v7, vcc, s0, v5
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3556 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3560 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3564 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3568 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:16 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:16
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:20 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:24 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:28 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:32 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:32
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:36 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:40 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:44 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:48 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:48
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:52 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:56 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:60 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:64 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:64
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:68 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:72 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:76 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:80 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:80
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:84 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:88 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:92 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:96 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:96
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:100 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:104 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:108 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:112 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:112
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:116 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:120 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:124 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:128 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:128
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:132 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:136 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:140 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:144 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:144
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:148 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:152 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:156 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:160 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:160
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:164 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:168 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:172 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:176 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:176
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:180 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:184 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:188 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:192 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:192
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:196 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:200 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:204 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:208 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:208
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:212 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:216 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:220 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:224 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:224
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:228 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:232 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:236 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:240 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:240
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:244 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:248 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:252 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:256 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:256
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:260 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:264 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:268 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:272 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:272
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:276 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:280 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:284 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:288 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:288
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:292 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:296 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:300 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:304 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:304
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:308 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:312 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:316 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:320 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:320
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:324 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:328 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:332 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:336 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:336
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:340 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:344 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:348 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:352 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:352
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:356 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:360 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:364 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:368 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:368
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:372 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:376 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:380 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:384 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:384
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:388 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:392 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:396 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:400 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:400
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:404 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:408 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:412 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:416 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:416
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:420 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:424 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:428 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:432 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:432
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:436 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:440 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:444 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:448 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:448
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:452 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:456 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:460 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:464 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:464
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:468 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:472 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:476 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:480 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:480
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:484 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:488 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:492 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:496 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:496
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:500 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:504 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:508 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:512 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:512
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:516 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:520 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:524 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:528 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:528
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:532 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:536 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:540 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:544 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:544
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:548 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:552 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:556 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:560 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:560
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:564 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:568 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:572 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:576 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:576
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:580 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:584 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:588 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:592 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:592
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:596 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:600 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:604 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:608 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:608
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:612 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:616 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:620 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:624 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:624
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:628 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:632 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:636 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:640 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:640
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:644 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:648 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:652 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:656 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:656
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:660 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:664 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:668 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:672 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:672
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:676 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:680 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:684 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:688 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:688
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:692 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:696 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:700 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:704 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:704
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:708 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:712 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:716 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:720 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:720
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:724 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:728 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:732 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:736 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:736
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:740 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:744 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:748 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:752 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:752
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:756 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:760 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:764 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:768 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:768
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:772 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:776 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:780 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:784 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:784
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:788 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:792 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:796 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:800 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:800
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:804 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:808 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:812 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:816 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:816
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:820 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:824 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:828 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:832 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:832
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:836 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:840 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:844 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:848 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:848
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:852 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:856 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:860 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:864 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:864
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:868 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:872 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:876 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:880 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:880
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:884 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:888 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:892 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:896 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:896
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:900 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:904 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:908 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:912 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:912
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:916 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:920 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:924 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:928 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:928
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:932 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:936 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:940 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:944 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:944
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:948 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:952 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:956 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:960 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:960
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:964 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:968 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:972 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:976 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:976
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:980 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:984 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:988 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:992 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:992
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:996 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1000 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1004 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1008 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1008
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1012 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1016 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1020 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1024 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1024
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1028 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1032 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1036 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1040 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1040
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1044 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1048 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1052 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1056 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1056
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1060 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1064 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1068 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1072 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1072
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1076 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1080 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1084 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1088 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1088
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1092 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1096 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1100 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1104 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1104
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1108 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1112 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1116 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1120 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1120
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1124 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1128 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1132 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1136 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1136
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1140 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1144 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1148 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1152 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1152
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1156 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1160 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1164 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1168 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1168
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1172 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1176 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1180 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1184 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1184
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1188 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1192 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1196 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1200 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1200
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1204 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1208 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1212 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1216 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1216
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1220 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1224 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1228 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1232 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1232
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1236 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1240 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1244 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1248 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1248
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1252 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1256 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1260 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1264 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1264
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1284 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1288 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1292 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1296 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1280
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1316 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1320 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1324 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1328 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1296
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1348 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1352 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1356 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1360 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1312
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1380 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1384 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1388 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1392 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1328
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1412 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1416 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1420 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1424 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1344
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1444 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1448 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1452 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1456 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1360
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1476 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1480 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1484 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1488 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1376
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1508 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1512 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1516 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1520 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1392
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1524 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1528 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1532 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1536 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1408
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1540 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1544 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1548 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1552 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1424
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1572 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1576 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1580 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1584 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1440
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1604 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1608 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1612 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1616 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1456
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1636 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1640 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1644 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1648 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1472
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1668 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1672 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1676 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1680 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1488
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1700 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1704 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1708 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1712 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1504
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1732 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1736 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1740 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1744 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1520
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1764 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1768 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1772 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1776 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1536
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1796 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1800 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1804 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1808 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1552
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1812 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1816 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1820 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1824 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1568
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1828 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1832 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1836 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1840 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1584
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1844 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1848 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1852 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1856 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1600
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1876 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1880 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1884 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1888 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1616
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1908 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1912 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1916 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1920 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1632
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1940 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1944 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1948 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1952 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1648
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1972 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1976 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1980 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1984 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1664
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2004 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2008 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2012 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2016 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1680
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2036 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2040 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2044 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2048 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1696
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2068 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2072 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2076 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2080 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1712
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2100 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2104 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2108 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2112 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1728
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2116 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2120 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2124 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2128 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1744
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2132 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2136 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2140 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2144 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1760
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2164 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2168 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2172 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2176 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1776
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2196 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2200 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2204 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2208 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1792
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2228 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2232 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2236 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2240 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1808
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2260 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2264 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2268 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2272 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1824
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2292 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2296 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2300 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2304 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1840
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2324 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2328 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2332 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2336 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1856
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2356 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2360 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2364 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2368 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1872
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2388 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2392 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2396 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2400 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1888
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2404 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2408 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2412 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2416 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1904
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2420 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2424 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2428 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2432 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1920
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2436 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2440 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2444 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2448 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1936
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2468 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2472 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2476 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2480 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1952
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2500 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2504 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2508 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2512 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1968
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2532 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2536 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2540 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2544 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1984
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2564 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2568 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2572 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2576 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2000
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2596 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2600 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2604 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2608 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2016
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2628 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2632 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2636 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2640 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2032
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2660 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2664 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2668 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2672 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2048
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2692 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2696 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2700 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2704 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2064
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2708 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2712 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2716 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2720 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2080
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2724 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2728 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2732 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2736 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2096
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2756 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2760 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2764 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2768 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2112
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2788 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2792 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2796 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2800 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2128
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2820 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2824 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2828 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2832 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2144
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2852 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2856 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2860 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2864 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2160
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2884 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2888 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2892 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2896 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2176
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2916 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2920 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2924 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2928 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2192
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2948 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2952 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2956 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2960 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2208
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2980 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2984 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2988 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2992 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2224
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2996 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3000 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3004 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3008 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2240
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3012 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3016 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3020 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3024 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2256
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3028 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3032 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3036 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3040 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2272
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3060 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3064 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3068 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3072 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2288
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3092 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3096 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3100 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3104 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2304
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3124 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3128 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3132 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3136 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2320
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3156 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3160 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3164 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3168 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2336
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3188 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3192 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3196 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3200 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2352
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3220 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3224 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3228 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3232 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2368
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3252 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3256 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3260 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3264 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2384
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3284 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3288 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3292 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3296 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2400
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3300 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3304 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3308 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3312 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2416
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3316 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3320 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3324 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3328 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2432
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3348 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3352 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3356 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3360 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2448
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3380 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3384 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3388 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3392 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2464
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3412 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3416 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3420 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3424 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2480
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3444 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3448 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3452 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3456 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2496
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3476 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3480 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3484 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3488 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2512
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3508 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3512 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3516 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3520 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2528
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3540 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3544 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3548 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3552 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2544
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3572 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3576 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3580 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3584 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2560
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3588 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3592 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3596 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3600 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2576
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3604 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3608 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3612 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3616 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2592
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3620 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3624 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3628 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3632 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2608
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3636 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3640 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3644 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3648 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2624
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3652 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3656 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3660 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3664 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2640
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3668 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3672 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3676 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3680 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2656
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3684 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3688 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3692 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3696 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2672
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3700 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3704 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3708 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3712 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2688
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3716 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3720 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3724 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3728 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2704
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3732 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3736 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3740 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3744 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2720
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3748 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3752 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3756 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3760 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2736
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3764 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3768 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3772 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3776 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2752
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3780 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3784 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3788 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3792 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2768
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3796 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3800 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3804 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3808 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2784
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3812 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3816 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3820 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3824 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2800
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3828 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3832 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3836 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3840 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2816
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3844 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3848 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3852 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3856 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2832
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3860 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3864 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3868 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3872 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2848
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3876 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3880 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3884 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3888 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2864
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3892 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3896 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3900 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3904 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2880
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3908 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3912 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3916 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3920 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2896
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3924 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3928 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3932 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3936 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2912
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3940 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3944 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3948 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3952 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2928
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3956 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3960 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3964 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3968 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2944
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3972 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3976 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3980 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3984 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2960
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3988 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3992 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3996 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:4000 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2976
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:4004 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:4008 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:4012 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:4016 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2992
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:4020 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:4024 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:4028 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:4032 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3008
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:4036 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:4040 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:4044 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:4048 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3024
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:4052 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:4056 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:4060 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:4064 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3040
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:4068 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:4072 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:4076 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:4080 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3056
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3072
+; GFX6-NEXT:    s_mov_b32 s2, 0x40100
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3088
+; GFX6-NEXT:    s_mov_b32 s2, 0x40500
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3104
+; GFX6-NEXT:    s_mov_b32 s2, 0x40900
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3120
+; GFX6-NEXT:    s_mov_b32 s2, 0x40d00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3136
+; GFX6-NEXT:    s_mov_b32 s2, 0x41100
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3152
+; GFX6-NEXT:    s_mov_b32 s2, 0x41500
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3168
+; GFX6-NEXT:    s_mov_b32 s2, 0x41900
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3184
+; GFX6-NEXT:    s_mov_b32 s2, 0x41d00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3200
+; GFX6-NEXT:    s_mov_b32 s2, 0x42100
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3216
+; GFX6-NEXT:    s_mov_b32 s2, 0x42500
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3232
+; GFX6-NEXT:    s_mov_b32 s2, 0x42900
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3248
+; GFX6-NEXT:    s_mov_b32 s2, 0x42d00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3264
+; GFX6-NEXT:    s_mov_b32 s2, 0x43100
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3280
+; GFX6-NEXT:    s_mov_b32 s2, 0x43500
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3296
+; GFX6-NEXT:    s_mov_b32 s2, 0x43900
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3312
+; GFX6-NEXT:    s_mov_b32 s2, 0x43d00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3328
+; GFX6-NEXT:    s_mov_b32 s2, 0x44100
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3344
+; GFX6-NEXT:    s_mov_b32 s2, 0x44500
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3360
+; GFX6-NEXT:    s_mov_b32 s2, 0x44900
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3376
+; GFX6-NEXT:    s_mov_b32 s2, 0x44d00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3392
+; GFX6-NEXT:    s_mov_b32 s2, 0x45100
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3408
+; GFX6-NEXT:    s_mov_b32 s2, 0x45500
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3424
+; GFX6-NEXT:    s_mov_b32 s2, 0x45900
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3440
+; GFX6-NEXT:    s_mov_b32 s2, 0x45d00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3456
+; GFX6-NEXT:    s_mov_b32 s2, 0x46100
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3472
+; GFX6-NEXT:    s_mov_b32 s2, 0x46500
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3488
+; GFX6-NEXT:    s_mov_b32 s2, 0x46900
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3504
+; GFX6-NEXT:    s_mov_b32 s2, 0x46d00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3520
+; GFX6-NEXT:    s_mov_b32 s2, 0x47100
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3536
+; GFX6-NEXT:    s_mov_b32 s2, 0x47500
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3552
+; GFX6-NEXT:    s_mov_b32 s2, 0x47900
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3568
+; GFX6-NEXT:    s_mov_b32 s2, 0x47d00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3584
+; GFX6-NEXT:    s_mov_b32 s2, 0x48100
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3600
+; GFX6-NEXT:    s_mov_b32 s2, 0x48500
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3616
+; GFX6-NEXT:    s_mov_b32 s2, 0x48900
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3632
+; GFX6-NEXT:    s_mov_b32 s2, 0x48d00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3648
+; GFX6-NEXT:    s_mov_b32 s2, 0x49100
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3664
+; GFX6-NEXT:    s_mov_b32 s2, 0x49500
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3680
+; GFX6-NEXT:    s_mov_b32 s2, 0x49900
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3696
+; GFX6-NEXT:    s_mov_b32 s2, 0x49d00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3712
+; GFX6-NEXT:    s_mov_b32 s2, 0x4a100
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3728
+; GFX6-NEXT:    s_mov_b32 s2, 0x4a500
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3744
+; GFX6-NEXT:    s_mov_b32 s2, 0x4a900
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3760
+; GFX6-NEXT:    s_mov_b32 s2, 0x4ad00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3776
+; GFX6-NEXT:    s_mov_b32 s2, 0x4b100
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3792
+; GFX6-NEXT:    s_mov_b32 s2, 0x4b500
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3808
+; GFX6-NEXT:    s_mov_b32 s2, 0x4b900
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3824
+; GFX6-NEXT:    s_mov_b32 s2, 0x4bd00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3840
+; GFX6-NEXT:    s_mov_b32 s2, 0x4c100
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3856
+; GFX6-NEXT:    s_mov_b32 s2, 0x4c500
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3872
+; GFX6-NEXT:    s_mov_b32 s2, 0x4c900
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3888
+; GFX6-NEXT:    s_mov_b32 s2, 0x4cd00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3904
+; GFX6-NEXT:    s_mov_b32 s2, 0x4d100
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3920
+; GFX6-NEXT:    s_mov_b32 s2, 0x4d500
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3936
+; GFX6-NEXT:    s_mov_b32 s2, 0x4d900
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3952
+; GFX6-NEXT:    s_mov_b32 s2, 0x4dd00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3968
+; GFX6-NEXT:    s_mov_b32 s2, 0x4e100
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3984
+; GFX6-NEXT:    s_mov_b32 s2, 0x4e500
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:4000
+; GFX6-NEXT:    s_mov_b32 s2, 0x4e900
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:4016
+; GFX6-NEXT:    s_mov_b32 s2, 0x4ed00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:4032
+; GFX6-NEXT:    s_mov_b32 s2, 0x4f100
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:4048
+; GFX6-NEXT:    s_mov_b32 s2, 0x4f500
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:4064
+; GFX6-NEXT:    s_mov_b32 s2, 0x4f900
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:4080
+; GFX6-NEXT:    ;;#ASMSTART
+; GFX6-NEXT:    ;;#ASMEND
+; GFX6-NEXT:    ;;#ASMSTART
+; GFX6-NEXT:    ;;#ASMEND
+; GFX6-NEXT:    ;;#ASMSTART
+; GFX6-NEXT:    ;;#ASMEND
+; GFX6-NEXT:    ;;#ASMSTART
+; GFX6-NEXT:    ;;#ASMEND
+; GFX6-NEXT:    ;;#ASMSTART
+; GFX6-NEXT:    ;;#ASMEND
+; GFX6-NEXT:    ;;#ASMSTART
+; GFX6-NEXT:    ;;#ASMEND
+; GFX6-NEXT:    ;;#ASMSTART
+; GFX6-NEXT:    ;;#ASMEND
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:3556 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:3560 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:3564 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:3568 ; 4-byte Folded Reload
+; GFX6-NEXT:    v_mov_b32_e32 v4, s1
+; GFX6-NEXT:    v_addc_u32_e32 v8, vcc, 0, v4, vcc
+; GFX6-NEXT:    s_mov_b64 s[2:3], s[18:19]
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[40:43], 0 addr64 offset:4080
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:3524 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:3528 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:3532 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:3536 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[40:43], 0 addr64 offset:4064
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:3492 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:3496 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:3500 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:3504 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[40:43], 0 addr64 offset:4048
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:3460 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:3464 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:3468 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:3472 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[40:43], 0 addr64 offset:4032
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:3428 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:3432 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:3436 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:3440 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[40:43], 0 addr64 offset:4016
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:3396 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:3400 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:3404 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:3408 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[40:43], 0 addr64 offset:4000
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:3364 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:3368 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:3372 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:3376 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[40:43], 0 addr64 offset:3984
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:3332 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:3336 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:3340 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:3344 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[40:43], 0 addr64 offset:3968
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:3268 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:3272 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:3276 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:3280 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[36:39], 0 addr64 offset:4080
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:3236 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:3240 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:3244 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:3248 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[36:39], 0 addr64 offset:4064
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:3204 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:3208 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:3212 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:3216 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[36:39], 0 addr64 offset:4048
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:3172 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:3176 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:3180 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:3184 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[36:39], 0 addr64 offset:4032
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:3140 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:3144 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:3148 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:3152 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[36:39], 0 addr64 offset:4016
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:3108 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:3112 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:3116 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:3120 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[36:39], 0 addr64 offset:4000
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:3076 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:3080 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:3084 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:3088 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[36:39], 0 addr64 offset:3984
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:3044 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:3048 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:3052 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:3056 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[36:39], 0 addr64 offset:3968
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:2964 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:2968 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:2972 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:2976 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[28:31], 0 addr64 offset:4080
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:2932 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:2936 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:2940 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:2944 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[28:31], 0 addr64 offset:4064
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:2900 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:2904 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:2908 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:2912 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[28:31], 0 addr64 offset:4048
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:2868 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:2872 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:2876 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:2880 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[28:31], 0 addr64 offset:4032
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:2836 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:2840 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:2844 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:2848 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[28:31], 0 addr64 offset:4016
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:2804 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:2808 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:2812 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:2816 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[28:31], 0 addr64 offset:4000
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:2772 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:2776 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:2780 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:2784 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[28:31], 0 addr64 offset:3984
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:2740 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:2744 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:2748 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:2752 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[28:31], 0 addr64 offset:3968
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:2676 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:2680 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:2684 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:2688 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[24:27], 0 addr64 offset:4080
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:2644 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:2648 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:2652 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:2656 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[24:27], 0 addr64 offset:4064
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:2612 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:2616 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:2620 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:2624 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[24:27], 0 addr64 offset:4048
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:2580 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:2584 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:2588 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:2592 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[24:27], 0 addr64 offset:4032
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:2548 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:2552 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:2556 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:2560 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[24:27], 0 addr64 offset:4016
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:2516 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:2520 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:2524 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:2528 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[24:27], 0 addr64 offset:4000
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:2484 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:2488 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:2492 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:2496 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[24:27], 0 addr64 offset:3984
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:2452 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:2456 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:2460 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:2464 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[24:27], 0 addr64 offset:3968
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:2372 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:2376 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:2380 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:2384 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[20:23], 0 addr64 offset:4080
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:2340 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:2344 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:2348 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:2352 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[20:23], 0 addr64 offset:4064
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:2308 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:2312 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:2316 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:2320 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[20:23], 0 addr64 offset:4048
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:2276 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:2280 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:2284 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:2288 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[20:23], 0 addr64 offset:4032
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:2244 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:2248 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:2252 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:2256 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[20:23], 0 addr64 offset:4016
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:2212 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:2216 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:2220 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:2224 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[20:23], 0 addr64 offset:4000
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:2180 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:2184 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:2188 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:2192 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[20:23], 0 addr64 offset:3984
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:2148 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:2152 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:2156 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:2160 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[20:23], 0 addr64 offset:3968
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:2084 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:2088 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:2092 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:2096 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[12:15], 0 addr64 offset:4080
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:2052 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:2056 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:2060 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:2064 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[12:15], 0 addr64 offset:4064
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:2020 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:2024 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:2028 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:2032 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[12:15], 0 addr64 offset:4048
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:1988 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:1992 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:1996 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:2000 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[12:15], 0 addr64 offset:4032
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:1956 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:1960 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:1964 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:1968 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[12:15], 0 addr64 offset:4016
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:1924 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:1928 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:1932 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:1936 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[12:15], 0 addr64 offset:4000
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:1892 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:1896 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:1900 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:1904 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[12:15], 0 addr64 offset:3984
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:1860 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:1864 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:1868 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:1872 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[12:15], 0 addr64 offset:3968
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:1780 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:1784 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:1788 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:1792 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[8:11], 0 addr64 offset:4080
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:1748 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:1752 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:1756 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:1760 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[8:11], 0 addr64 offset:4064
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:1716 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:1720 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:1724 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:1728 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[8:11], 0 addr64 offset:4048
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:1684 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:1688 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:1692 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:1696 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[8:11], 0 addr64 offset:4032
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:1652 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:1656 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:1660 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:1664 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[8:11], 0 addr64 offset:4016
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:1620 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:1624 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:1628 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:1632 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[8:11], 0 addr64 offset:4000
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:1588 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:1592 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:1596 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:1600 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[8:11], 0 addr64 offset:3984
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:1556 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:1560 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:1564 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:1568 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[8:11], 0 addr64 offset:3968
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:1492 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:1496 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:1500 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:1504 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[4:7], 0 addr64 offset:4080
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:1460 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:1464 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:1468 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:1472 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[4:7], 0 addr64 offset:4064
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:1428 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:1432 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:1436 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:1440 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[4:7], 0 addr64 offset:4048
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:1396 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:1400 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:1404 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:1408 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[4:7], 0 addr64 offset:4032
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:1364 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:1368 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:1372 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:1376 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[4:7], 0 addr64 offset:4016
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:1332 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:1336 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:1340 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:1344 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[4:7], 0 addr64 offset:4000
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:1300 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:1304 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:1308 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:1312 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[4:7], 0 addr64 offset:3984
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:1268 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:1272 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:1276 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:1280 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[4:7], 0 addr64 offset:3968
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:4080
+; GFX6-NEXT:    s_mov_b32 s4, 0x4f900
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x4f500
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:4064
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x4f100
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:4048
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x4ed00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:4032
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x4e900
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:4016
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x4e500
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:4000
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x4e100
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3984
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x4dd00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3968
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x4d900
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3952
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x4d500
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3936
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x4d100
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3920
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x4cd00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3904
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x4c900
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3888
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x4c500
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3872
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x4c100
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3856
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x4bd00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3840
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x4b900
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3824
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x4b500
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3808
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x4b100
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3792
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x4ad00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3776
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x4a900
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3760
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x4a500
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3744
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x4a100
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3728
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x49d00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3712
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x49900
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3696
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x49500
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3680
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x49100
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3664
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x48d00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3648
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x48900
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3632
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x48500
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3616
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x48100
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3600
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x47d00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3584
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x47900
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3568
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x47500
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3552
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x47100
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3536
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x46d00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3520
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x46900
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3504
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x46500
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3488
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x46100
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3472
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x45d00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3456
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x45900
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3440
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x45500
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3424
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x45100
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3408
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x44d00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3392
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x44900
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3376
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x44500
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3360
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x44100
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3344
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x43d00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3328
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x43900
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3312
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x43500
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3296
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x43100
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3280
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x42d00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3264
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x42900
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3248
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x42500
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3232
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x42100
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3216
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x41d00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3200
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x41900
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3184
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x41500
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3168
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x41100
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3152
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x40d00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3136
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x40900
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3120
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x40500
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3104
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x40100
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3088
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x3fd00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3072
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3056
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:4068 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:4072 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:4076 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:4080 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3040
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:4052 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:4056 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:4060 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:4064 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3024
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:4036 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:4040 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:4044 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:4048 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3008
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:4020 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:4024 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:4028 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:4032 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2992
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:4004 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:4008 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:4012 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:4016 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2976
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3988 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3992 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3996 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:4000 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2960
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3972 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3976 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3980 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3984 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2944
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3956 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3960 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3964 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3968 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2928
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3940 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3944 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3948 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3952 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2912
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3924 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3928 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3932 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3936 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2896
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3908 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3912 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3916 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3920 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2880
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3892 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3896 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3900 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3904 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2864
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3876 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3880 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3884 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3888 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2848
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3860 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3864 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3868 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3872 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2832
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3844 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3848 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3852 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3856 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2816
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3828 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3832 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3836 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3840 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2800
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3812 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3816 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3820 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3824 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2784
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3796 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3800 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3804 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3808 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2768
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3780 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3784 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3788 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3792 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2752
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3764 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3768 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3772 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3776 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2736
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3748 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3752 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3756 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3760 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2720
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3732 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3736 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3740 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3744 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2704
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3716 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3720 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3724 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3728 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2688
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3700 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3704 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3708 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3712 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2672
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3684 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3688 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3692 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3696 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2656
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3668 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3672 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3676 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3680 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2640
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3652 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3656 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3660 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3664 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2624
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3636 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3640 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3644 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3648 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2608
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3620 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3624 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3628 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3632 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2592
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3604 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3608 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3612 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3616 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2576
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3588 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3592 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3596 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3600 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2560
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3572 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3576 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3580 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3584 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2544
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3540 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3544 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3548 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3552 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2528
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3508 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3512 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3516 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3520 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2512
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3476 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3480 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3484 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3488 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2496
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3444 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3448 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3452 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3456 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2480
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3412 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3416 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3420 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3424 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2464
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3380 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3384 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3388 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3392 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2448
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3348 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3352 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3356 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3360 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2432
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3316 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3320 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3324 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3328 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2416
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3300 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3304 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3308 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3312 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2400
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3284 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3288 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3292 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3296 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2384
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3252 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3256 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3260 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3264 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2368
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3220 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3224 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3228 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3232 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2352
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3188 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3192 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3196 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3200 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2336
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3156 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3160 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3164 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3168 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2320
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3124 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3128 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3132 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3136 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2304
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3092 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3096 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3100 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3104 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2288
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3060 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3064 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3068 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3072 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2272
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3028 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3032 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3036 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3040 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2256
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3012 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3016 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3020 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3024 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2240
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2996 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3000 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3004 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3008 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2224
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2980 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2984 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2988 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2992 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2208
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2948 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2952 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2956 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2960 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2192
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2916 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2920 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2924 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2928 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2176
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2884 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2888 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2892 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2896 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2160
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2852 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2856 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2860 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2864 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2144
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2820 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2824 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2828 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2832 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2128
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2788 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2792 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2796 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2800 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2112
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2756 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2760 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2764 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2768 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2096
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2724 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2728 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2732 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2736 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2080
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2708 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2712 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2716 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2720 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2064
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2692 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2696 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2700 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2704 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2048
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2660 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2664 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2668 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2672 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2032
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2628 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2632 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2636 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2640 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2016
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2596 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2600 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2604 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2608 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2000
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2564 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2568 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2572 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2576 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1984
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2532 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2536 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2540 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2544 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1968
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2500 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2504 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2508 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2512 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1952
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2468 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2472 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2476 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2480 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1936
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2436 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2440 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2444 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2448 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1920
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2420 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2424 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2428 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2432 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1904
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2404 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2408 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2412 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2416 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1888
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2388 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2392 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2396 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2400 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1872
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2356 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2360 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2364 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2368 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1856
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2324 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2328 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2332 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2336 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1840
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2292 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2296 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2300 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2304 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1824
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2260 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2264 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2268 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2272 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1808
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2228 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2232 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2236 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2240 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1792
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2196 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2200 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2204 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2208 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1776
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2164 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2168 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2172 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2176 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1760
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2132 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2136 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2140 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2144 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1744
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2116 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2120 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2124 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2128 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1728
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2100 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2104 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2108 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2112 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1712
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2068 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2072 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2076 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2080 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1696
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2036 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2040 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2044 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2048 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1680
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2004 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2008 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2012 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2016 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1664
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1972 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1976 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1980 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1984 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1648
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1940 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1944 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1948 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1952 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1632
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1908 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1912 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1916 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1920 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1616
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1876 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1880 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1884 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1888 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1600
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1844 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1848 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1852 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1856 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1584
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1828 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1832 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1836 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1840 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1568
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1812 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1816 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1820 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1824 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1552
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1796 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1800 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1804 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1808 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1536
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1764 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1768 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1772 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1776 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1520
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1732 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1736 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1740 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1744 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1504
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1700 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1704 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1708 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1712 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1488
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1668 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1672 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1676 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1680 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1472
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1636 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1640 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1644 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1648 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1456
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1604 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1608 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1612 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1616 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1440
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1572 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1576 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1580 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1584 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1424
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1540 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1544 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1548 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1552 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1408
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1524 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1528 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1532 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1536 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1392
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1508 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1512 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1516 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1520 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1376
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1476 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1480 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1484 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1488 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1360
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1444 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1448 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1452 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1456 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1344
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1412 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1416 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1420 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1424 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1328
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1380 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1384 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1388 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1392 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1312
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1348 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1352 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1356 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1360 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1296
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1316 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1320 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1324 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1328 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1280
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1284 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1288 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1292 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1296 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1264
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1252 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1256 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1260 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1264 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1248
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1236 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1240 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1244 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1248 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1232
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1220 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1224 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1228 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1232 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1216
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1204 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1208 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1212 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1216 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1200
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1188 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1192 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1196 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1200 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1184
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1172 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1176 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1180 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1184 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1168
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1156 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1160 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1164 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1168 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1152
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1140 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1144 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1148 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1152 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1136
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1124 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1128 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1132 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1136 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1120
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1108 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1112 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1116 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1120 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1104
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1092 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1096 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1100 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1104 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1088
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1076 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1080 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1084 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1088 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1072
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1060 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1064 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1068 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1072 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1056
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1044 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1048 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1052 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1056 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1040
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1028 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1032 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1036 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1040 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1024
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1012 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1016 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1020 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1024 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1008
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:996 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1000 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1004 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1008 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:992
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:980 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:984 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:988 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:992 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:976
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:964 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:968 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:972 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:976 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:960
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:948 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:952 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:956 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:960 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:944
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:932 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:936 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:940 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:944 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:928
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:916 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:920 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:924 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:928 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:912
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:900 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:904 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:908 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:912 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:896
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:884 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:888 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:892 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:896 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:880
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:868 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:872 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:876 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:880 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:864
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:852 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:856 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:860 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:864 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:848
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:836 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:840 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:844 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:848 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:832
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:820 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:824 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:828 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:832 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:816
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:804 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:808 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:812 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:816 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:800
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:788 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:792 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:796 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:800 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:784
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:772 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:776 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:780 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:784 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:768
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:756 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:760 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:764 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:768 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:752
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:740 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:744 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:748 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:752 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:736
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:724 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:728 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:732 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:736 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:720
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:708 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:712 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:716 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:720 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:704
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:692 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:696 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:700 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:704 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:688
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:676 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:680 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:684 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:688 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:672
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:660 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:664 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:668 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:672 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:656
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:644 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:648 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:652 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:656 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:640
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:628 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:632 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:636 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:640 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:624
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:612 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:616 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:620 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:624 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:608
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:596 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:600 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:604 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:608 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:592
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:580 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:584 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:588 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:592 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:576
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:564 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:568 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:572 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:576 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:560
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:548 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:552 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:556 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:560 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:544
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:532 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:536 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:540 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:544 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:528
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:516 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:520 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:524 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:528 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:512
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:500 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:504 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:508 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:512 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:496
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:484 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:488 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:492 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:496 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:480
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:468 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:472 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:476 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:480 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:464
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:452 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:456 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:460 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:464 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:448
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:436 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:440 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:444 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:448 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:432
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:420 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:424 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:428 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:432 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:416
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:404 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:408 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:412 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:416 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:400
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:388 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:392 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:396 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:400 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:384
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:372 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:376 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:380 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:384 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:368
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:356 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:360 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:364 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:368 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:352
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:340 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:344 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:348 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:352 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:336
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:324 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:328 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:332 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:336 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:320
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:308 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:312 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:316 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:320 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:304
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:292 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:296 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:300 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:304 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:288
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:276 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:280 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:284 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:288 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:272
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:260 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:264 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:268 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:272 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:256
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:244 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:248 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:252 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:256 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:240
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:228 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:232 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:236 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:240 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:224
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:212 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:216 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:220 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:224 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:208
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:196 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:200 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:204 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:208 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:192
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:180 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:184 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:188 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:192 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:176
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:164 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:168 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:172 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:176 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:160
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:148 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:152 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:156 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:160 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:144
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:132 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:136 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:140 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:144 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:128
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:116 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:120 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:124 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:128 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:112
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:100 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:104 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:108 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:112 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:96
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:84 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:88 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:92 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:96 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:80
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:68 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:72 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:76 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:80 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:64
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:52 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:56 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:60 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:64 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:48
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:36 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:40 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:44 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:48 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:32
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:20 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:24 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:28 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:32 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:16
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:16 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64
+; GFX6-NEXT:    s_endpgm
+;
+; GFX9-FLATSCR-LABEL: test:
+; GFX9-FLATSCR:       ; %bb.0: ; %entry
+; GFX9-FLATSCR-NEXT:    s_add_u32 flat_scratch_lo, s2, s5
+; GFX9-FLATSCR-NEXT:    s_addc_u32 flat_scratch_hi, s3, 0
+; GFX9-FLATSCR-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX9-FLATSCR-NEXT:    v_mbcnt_lo_u32_b32 v0, -1, 0
+; GFX9-FLATSCR-NEXT:    v_mbcnt_hi_u32_b32 v0, -1, v0
+; GFX9-FLATSCR-NEXT:    v_lshlrev_b32_e32 v5, 13, v0
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x80
+; GFX9-FLATSCR-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-FLATSCR-NEXT:    v_add_co_u32_e32 v2, vcc, s2, v5
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v0, s3
+; GFX9-FLATSCR-NEXT:    v_addc_co_u32_e32 v3, vcc, 0, v0, vcc
+; GFX9-FLATSCR-NEXT:    v_add_co_u32_e32 v0, vcc, s4, v2
+; GFX9-FLATSCR-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:3968
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s4, 4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s5, 0x84
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s6, 0x104
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s7, 0x184
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s8, 0x204
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s9, 0x284
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s10, 0x304
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x384
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:3984
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s4, 20
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4000
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s4, 36
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4016
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s4, 52
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4032
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x44
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4048
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x54
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4064
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x64
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4080
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x74
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x100
+; GFX9-FLATSCR-NEXT:    v_add_co_u32_e32 v0, vcc, s4, v2
+; GFX9-FLATSCR-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:3968
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s5 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:3984
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s5, 0x94
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s5 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4000
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s5, 0xa4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s5 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4016
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s5, 0xb4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s5 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4032
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s5, 0xc4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s5 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4048
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s5, 0xd4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s5 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4064
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s5, 0xe4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s5 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4080
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s5, 0xf4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s5 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s5, 0x180
+; GFX9-FLATSCR-NEXT:    v_add_co_u32_e32 v0, vcc, s5, v2
+; GFX9-FLATSCR-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:3968
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s6 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:3984
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s6, 0x114
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s6 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4000
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s6, 0x124
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s6 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4016
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s6, 0x134
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s6 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4032
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s6, 0x144
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s6 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4048
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s6, 0x154
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s6 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4064
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s6, 0x164
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s6 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4080
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s6, 0x174
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s6 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s6, 0x200
+; GFX9-FLATSCR-NEXT:    v_add_co_u32_e32 v0, vcc, s6, v2
+; GFX9-FLATSCR-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:3968
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s7 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:3984
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s7, 0x194
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s7 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4000
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s7, 0x1a4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s7 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4016
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s7, 0x1b4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s7 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4032
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s7, 0x1c4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s7 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4048
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s7, 0x1d4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s7 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4064
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s7, 0x1e4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s7 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4080
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s7, 0x1f4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s7 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s7, 0x280
+; GFX9-FLATSCR-NEXT:    v_add_co_u32_e32 v0, vcc, s7, v2
+; GFX9-FLATSCR-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:3968
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s8 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:3984
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s8, 0x214
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s8 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4000
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s8, 0x224
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s8 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4016
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s8, 0x234
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s8 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4032
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s8, 0x244
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s8 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4048
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s8, 0x254
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s8 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4064
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s8, 0x264
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s8 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4080
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s8, 0x274
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s8 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s8, 0x300
+; GFX9-FLATSCR-NEXT:    v_add_co_u32_e32 v0, vcc, s8, v2
+; GFX9-FLATSCR-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:3968
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s9 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:3984
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s9, 0x294
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s9 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4000
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s9, 0x2a4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s9 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4016
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s9, 0x2b4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s9 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4032
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s9, 0x2c4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s9 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4048
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s9, 0x2d4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s9 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4064
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s9, 0x2e4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s9 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4080
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s9, 0x2f4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s9 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s9, 0x380
+; GFX9-FLATSCR-NEXT:    v_add_co_u32_e32 v0, vcc, s9, v2
+; GFX9-FLATSCR-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:3968
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s10 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:3984
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s10, 0x314
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s10 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4000
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s10, 0x324
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s10 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4016
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s10, 0x334
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s10 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4032
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s10, 0x344
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s10 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4048
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s10, 0x354
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s10 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4064
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s10, 0x364
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s10 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4080
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s10, 0x374
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s10 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s10, 0x400
+; GFX9-FLATSCR-NEXT:    v_add_co_u32_e32 v0, vcc, s10, v2
+; GFX9-FLATSCR-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:3968
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:3984
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x394
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4000
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x3a4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4016
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x3b4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4032
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x3c4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4048
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x3d4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4064
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4080
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x3e4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(1)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x3f4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(1)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3]
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x404
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v6, s1
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:16
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x414
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:32
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x424
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:48
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x434
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:64
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x444
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:80
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x454
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:96
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x464
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:112
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x474
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:128
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x484
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:144
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x494
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:160
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x4a4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:176
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x4b4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:192
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x4c4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:208
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x4d4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:224
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x4e4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:240
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x4f4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:256
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x504
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:272
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x514
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:288
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x524
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:304
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x534
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:320
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x544
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:336
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x554
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:352
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x564
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:368
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x574
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:384
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x584
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:400
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x594
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:416
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x5a4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:432
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x5b4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:448
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x5c4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:464
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x5d4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:480
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x5e4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:496
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x5f4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:512
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x604
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:528
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x614
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:544
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x624
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:560
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x634
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:576
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x644
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:592
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x654
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:608
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x664
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:624
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x674
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:640
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x684
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:656
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x694
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:672
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x6a4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:688
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x6b4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:704
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x6c4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:720
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x6d4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:736
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x6e4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:752
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x6f4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:768
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x704
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:784
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x714
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:800
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x724
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:816
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x734
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:832
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x744
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:848
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x754
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:864
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x764
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:880
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x774
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:896
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x784
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:912
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x794
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:928
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x7a4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:944
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x7b4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:960
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x7c4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:976
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x7d4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:992
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x7e4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1008
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x7f4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1024
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x804
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1040
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x814
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1056
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x824
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1072
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x834
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1088
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x844
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1104
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x854
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1120
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x864
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1136
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x874
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1152
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x884
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1168
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x894
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1184
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x8a4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1200
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x8b4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1216
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x8c4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1232
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x8d4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1248
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x8e4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1264
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x8f4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1280
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x904
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1296
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x914
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1312
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x924
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1328
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x934
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1344
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x944
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1360
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x954
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1376
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x964
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1392
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x974
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1408
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x984
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1424
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x994
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1440
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x9a4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1456
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x9b4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1472
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x9c4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1488
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x9d4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1504
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x9e4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1520
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x9f4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1536
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xa04
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1552
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xa14
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1568
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xa24
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1584
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xa34
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1600
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xa44
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1616
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xa54
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1632
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xa64
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1648
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xa74
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1664
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xa84
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1680
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xa94
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1696
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xaa4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1712
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xab4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1728
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xac4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1744
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xad4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1760
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xae4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1776
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xaf4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1792
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xb04
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1808
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xb14
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1824
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xb24
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1840
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xb34
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1856
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xb44
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1872
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xb54
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1888
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xb64
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1904
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xb74
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1920
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xb84
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1936
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xb94
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1952
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xba4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1968
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xbb4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1984
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xbc4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2000
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xbd4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2016
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xbe4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2032
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xbf4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2048
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xc04
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2064
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xc14
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2080
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xc24
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2096
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xc34
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2112
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xc44
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2128
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xc54
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2144
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xc64
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2160
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xc74
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2176
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xc84
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2192
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xc94
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2208
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xca4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2224
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xcb4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2240
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xcc4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2256
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xcd4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2272
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xce4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2288
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xcf4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2304
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xd04
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2320
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xd14
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2336
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xd24
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2352
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xd34
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2368
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xd44
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2384
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xd54
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2400
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xd64
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2416
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xd74
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2432
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xd84
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2448
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xd94
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2464
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xda4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2480
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xdb4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2496
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xdc4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2512
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xdd4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2528
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xde4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2544
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xdf4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2560
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xe04
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2576
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xe14
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2592
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xe24
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2608
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xe34
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2624
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xe44
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2640
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xe54
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2656
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xe64
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2672
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xe74
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2688
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xe84
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2704
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xe94
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2720
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xea4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2736
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xeb4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2752
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xec4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2768
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xed4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2784
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xee4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2800
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xef4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2816
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xf04
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2832
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xf14
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2848
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xf24
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2864
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xf34
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2880
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xf44
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2896
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xf54
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2912
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xf64
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2928
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xf74
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2944
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xf84
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2960
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xf94
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2976
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xfa4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2992
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xfb4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3008
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xfc4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3024
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xfd4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3040
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xfe4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3056
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xff4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3072
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1004
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3088
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1014
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3104
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1024
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3120
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1034
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3136
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1044
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3152
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1054
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3168
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1064
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3184
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1074
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3200
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1084
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3216
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1094
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3232
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x10a4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3248
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x10b4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3264
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x10c4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3280
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x10d4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3296
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x10e4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3312
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x10f4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3328
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1104
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3344
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1114
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3360
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1124
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3376
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1134
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3392
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1144
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3408
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1154
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3424
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1164
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3440
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1174
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3456
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1184
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3472
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1194
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3488
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x11a4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3504
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x11b4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3520
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x11c4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3536
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x11d4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3552
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x11e4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3568
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x11f4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3584
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1204
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3600
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1214
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3616
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1224
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3632
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1234
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3648
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1244
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3664
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1254
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3680
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1264
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3696
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1274
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3712
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1284
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3728
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1294
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3744
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x12a4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3760
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x12b4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3776
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x12c4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3792
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x12d4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3808
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x12e4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3824
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x12f4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3840
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1304
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3856
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1314
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3872
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1324
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3888
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1334
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3904
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1344
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3920
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1354
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3936
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1364
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3952
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1374
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3968
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1384
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3984
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1394
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:4000
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x13a4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:4016
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x13b4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:4032
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x13c4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:4048
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x13d4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:4064
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x13e4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:4080
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x13e4
+; GFX9-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX9-FLATSCR-NEXT:    ;;#ASMEND
+; GFX9-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX9-FLATSCR-NEXT:    ;;#ASMEND
+; GFX9-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX9-FLATSCR-NEXT:    ;;#ASMEND
+; GFX9-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX9-FLATSCR-NEXT:    ;;#ASMEND
+; GFX9-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX9-FLATSCR-NEXT:    ;;#ASMEND
+; GFX9-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX9-FLATSCR-NEXT:    ;;#ASMEND
+; GFX9-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX9-FLATSCR-NEXT:    ;;#ASMEND
+; GFX9-FLATSCR-NEXT:    v_add_co_u32_e32 v4, vcc, s0, v5
+; GFX9-FLATSCR-NEXT:    v_addc_co_u32_e32 v6, vcc, 0, v6, vcc
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:4080
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x13d4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:4064
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x13c4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:4048
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x13b4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:4032
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x13a4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:4016
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1394
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:4000
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1384
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3984
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1374
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3968
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1364
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3952
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1354
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3936
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1344
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3920
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1334
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3904
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1324
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3888
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1314
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3872
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1304
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3856
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x12f4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3840
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x12e4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3824
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x12d4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3808
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x12c4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3792
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x12b4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3776
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x12a4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3760
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1294
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3744
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1284
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3728
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1274
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3712
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1264
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3696
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1254
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3680
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1244
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3664
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1234
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3648
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1224
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3632
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1214
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3616
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1204
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3600
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x11f4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3584
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x11e4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3568
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x11d4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3552
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x11c4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3536
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x11b4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3520
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x11a4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3504
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1194
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3488
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1184
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3472
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1174
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3456
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1164
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3440
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1154
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3424
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1144
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3408
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1134
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3392
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1124
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3376
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1114
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3360
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1104
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3344
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x10f4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3328
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x10e4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3312
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x10d4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3296
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x10c4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3280
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x10b4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3264
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x10a4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3248
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1094
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3232
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1084
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3216
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1074
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3200
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1064
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3184
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1054
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3168
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1044
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3152
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1034
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3136
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1024
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3120
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1014
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3104
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1004
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3088
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xff4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3072
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xfe4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3056
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xfd4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3040
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xfc4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3024
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xfb4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3008
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xfa4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2992
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xf94
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2976
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xf84
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2960
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xf74
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2944
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xf64
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2928
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xf54
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2912
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xf44
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2896
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xf34
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2880
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xf24
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2864
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xf14
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2848
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xf04
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2832
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xef4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2816
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xee4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2800
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xed4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2784
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xec4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2768
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xeb4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2752
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xea4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2736
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xe94
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2720
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xe84
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2704
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xe74
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2688
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xe64
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2672
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xe54
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2656
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xe44
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2640
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xe34
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2624
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xe24
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2608
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xe14
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2592
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xe04
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2576
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xdf4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2560
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xde4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2544
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xdd4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2528
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xdc4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2512
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xdb4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2496
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xda4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2480
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xd94
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2464
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xd84
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2448
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xd74
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2432
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xd64
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2416
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xd54
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2400
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xd44
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2384
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xd34
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2368
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xd24
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2352
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xd14
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2336
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xd04
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2320
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xcf4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2304
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xce4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2288
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xcd4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2272
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xcc4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2256
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xcb4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2240
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xca4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2224
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xc94
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2208
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xc84
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2192
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xc74
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2176
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xc64
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2160
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xc54
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2144
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xc44
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2128
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xc34
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2112
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xc24
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2096
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xc14
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2080
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xc04
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2064
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xbf4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2048
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xbe4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2032
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xbd4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2016
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xbc4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2000
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xbb4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1984
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xba4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1968
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xb94
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1952
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xb84
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1936
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xb74
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1920
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xb64
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1904
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xb54
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1888
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xb44
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1872
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xb34
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1856
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xb24
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1840
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xb14
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1824
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xb04
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1808
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xaf4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1792
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xae4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1776
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xad4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1760
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xac4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1744
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xab4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1728
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xaa4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1712
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xa94
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1696
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xa84
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1680
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xa74
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1664
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xa64
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1648
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xa54
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1632
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xa44
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1616
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xa34
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1600
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xa24
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1584
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xa14
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1568
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xa04
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1552
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x9f4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1536
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x9e4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1520
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x9d4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1504
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x9c4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1488
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x9b4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1472
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x9a4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1456
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x994
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1440
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x984
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1424
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x974
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1408
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x964
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1392
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x954
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1376
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x944
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1360
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x934
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1344
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x924
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1328
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x914
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1312
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x904
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1296
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x8f4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1280
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x8e4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1264
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x8d4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1248
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x8c4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1232
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x8b4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1216
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x8a4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1200
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x894
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1184
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x884
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1168
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x874
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1152
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x864
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1136
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x854
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1120
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x844
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1104
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x834
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1088
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x824
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1072
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x814
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1056
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x804
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1040
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x7f4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1024
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x7e4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1008
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x7d4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:992
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x7c4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:976
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x7b4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:960
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x7a4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:944
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x794
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:928
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x784
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:912
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x774
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:896
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x764
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:880
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x754
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:864
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x744
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:848
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x734
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:832
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x724
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:816
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x714
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:800
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x704
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:784
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x6f4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:768
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x6e4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:752
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x6d4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:736
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x6c4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:720
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x6b4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:704
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x6a4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:688
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x694
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:672
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x684
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:656
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x674
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:640
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x664
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:624
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x654
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:608
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x644
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:592
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x634
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:576
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x624
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:560
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x614
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:544
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x604
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:528
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x5f4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:512
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x5e4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:496
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x5d4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:480
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x5c4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:464
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x5b4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:448
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x5a4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:432
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x594
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:416
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x584
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:400
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x574
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:384
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x564
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:368
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x554
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:352
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x544
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:336
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x534
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:320
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x524
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:304
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x514
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:288
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x504
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:272
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x4f4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:256
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x4e4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:240
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x4d4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:224
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x4c4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:208
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x4b4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:192
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x4a4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:176
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x494
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:160
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x484
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:144
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x474
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:128
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x464
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:112
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x454
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:96
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x444
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:80
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x434
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:64
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x424
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:48
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x414
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:32
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x404
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:16
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x3f4
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    v_add_co_u32_e32 v0, vcc, s10, v4
+; GFX9-FLATSCR-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v6, vcc
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x3e4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4080
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x3d4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4064
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x3c4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4048
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x3b4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4032
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x3a4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4016
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x394
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4000
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x384
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:3984
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x374
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:3968
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    v_add_co_u32_e32 v0, vcc, s9, v4
+; GFX9-FLATSCR-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v6, vcc
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x364
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4080
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x354
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4064
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x344
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4048
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x334
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4032
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x324
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4016
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x314
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4000
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x304
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:3984
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x2f4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:3968
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    v_add_co_u32_e32 v0, vcc, s8, v4
+; GFX9-FLATSCR-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v6, vcc
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x2e4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4080
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x2d4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4064
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x2c4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4048
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x2b4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4032
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x2a4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4016
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x294
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4000
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x284
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:3984
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x274
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:3968
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    v_add_co_u32_e32 v0, vcc, s7, v4
+; GFX9-FLATSCR-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v6, vcc
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x264
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4080
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x254
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4064
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x244
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4048
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x234
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4032
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x224
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4016
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x214
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4000
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x204
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:3984
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x1f4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:3968
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    v_add_co_u32_e32 v0, vcc, s6, v4
+; GFX9-FLATSCR-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v6, vcc
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x1e4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4080
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x1d4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4064
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x1c4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4048
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x1b4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4032
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x1a4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4016
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x194
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4000
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x184
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:3984
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x174
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:3968
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    v_add_co_u32_e32 v0, vcc, s5, v4
+; GFX9-FLATSCR-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v6, vcc
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x164
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4080
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x154
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4064
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x144
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4048
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x134
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4032
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x124
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4016
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x114
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4000
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x104
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:3984
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0xf4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:3968
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    v_add_co_u32_e32 v0, vcc, s4, v4
+; GFX9-FLATSCR-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v6, vcc
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0xe4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4080
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0xd4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4064
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0xc4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4048
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0xb4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4032
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0xa4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4016
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x94
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4000
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x84
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:3984
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x74
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:3968
+; GFX9-FLATSCR-NEXT:    v_add_co_u32_e32 v0, vcc, 0x80, v4
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[2:5], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v6, vcc
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x64
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off offset:4080
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[2:5], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x54
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off offset:4064
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[2:5], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x44
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off offset:4048
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[2:5], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s0, 52
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off offset:4032
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[2:5], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s0, 36
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off offset:4016
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[2:5], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s0, 20
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off offset:4000
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[2:5], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s0, 4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off offset:3984
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[2:5], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off offset:3968
+; GFX9-FLATSCR-NEXT:    s_endpgm
+;
+; GFX10-FLATSCR-LABEL: test:
+; GFX10-FLATSCR:       ; %bb.0: ; %entry
+; GFX10-FLATSCR-NEXT:    s_add_u32 s2, s2, s5
+; GFX10-FLATSCR-NEXT:    s_addc_u32 s3, s3, 0
+; GFX10-FLATSCR-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2
+; GFX10-FLATSCR-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3
+; GFX10-FLATSCR-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX10-FLATSCR-NEXT:    v_mbcnt_lo_u32_b32 v0, -1, 0
+; GFX10-FLATSCR-NEXT:    v_mbcnt_hi_u32_b32 v0, -1, v0
+; GFX10-FLATSCR-NEXT:    v_lshlrev_b32_e32 v5, 13, v0
+; GFX10-FLATSCR-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v4, s4, s2, v5
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e64 v22, null, s3, 0, s4
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x804
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v0, vcc_lo, 0x80, v4
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v22, vcc_lo
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v2, vcc_lo, 0x100, v4
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v3, vcc_lo, 0, v22, vcc_lo
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1920
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], off offset:4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1936
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], off offset:20 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1952
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], off offset:36 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1968
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], off offset:52 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1984
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], off offset:68 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:2000
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], off offset:84 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:2016
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], off offset:100 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:2032
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], off offset:116 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[2:3], off offset:1920
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], off offset:132 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[2:3], off offset:1936
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], off offset:148 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[2:3], off offset:1952
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], off offset:164 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[2:3], off offset:1968
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], off offset:180 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[2:3], off offset:1984
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], off offset:196 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[2:3], off offset:2000
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], off offset:212 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[2:3], off offset:2016
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], off offset:228 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[2:3], off offset:2032
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], off offset:244 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v6, vcc_lo, 0x180, v4
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v7, vcc_lo, 0, v22, vcc_lo
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[8:11], v[6:7], off offset:1920
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[8:11], off offset:260 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[8:11], v[6:7], off offset:1936
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[8:11], off offset:276 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[8:11], v[6:7], off offset:1952
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[8:11], off offset:292 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[8:11], v[6:7], off offset:1968
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[8:11], off offset:308 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[8:11], v[6:7], off offset:1984
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[8:11], off offset:324 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[8:11], v[6:7], off offset:2000
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[8:11], off offset:340 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[8:11], v[6:7], off offset:2016
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[8:11], off offset:356 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[8:11], v[6:7], off offset:2032
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[8:11], off offset:372 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v8, vcc_lo, 0x200, v4
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v9, vcc_lo, 0, v22, vcc_lo
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[10:13], v[8:9], off offset:1920
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[10:13], off offset:388 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[10:13], v[8:9], off offset:1936
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[10:13], off offset:404 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[10:13], v[8:9], off offset:1952
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[10:13], off offset:420 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[10:13], v[8:9], off offset:1968
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[10:13], off offset:436 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[10:13], v[8:9], off offset:1984
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[10:13], off offset:452 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[10:13], v[8:9], off offset:2000
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[10:13], off offset:468 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[10:13], v[8:9], off offset:2016
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[10:13], off offset:484 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[10:13], v[8:9], off offset:2032
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[10:13], off offset:500 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v10, vcc_lo, 0x280, v4
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v11, vcc_lo, 0, v22, vcc_lo
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[12:15], v[10:11], off offset:1920
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[12:15], off offset:516 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[12:15], v[10:11], off offset:1936
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[12:15], off offset:532 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[12:15], v[10:11], off offset:1952
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[12:15], off offset:548 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[12:15], v[10:11], off offset:1968
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[12:15], off offset:564 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[12:15], v[10:11], off offset:1984
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[12:15], off offset:580 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[12:15], v[10:11], off offset:2000
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[12:15], off offset:596 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[12:15], v[10:11], off offset:2016
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[12:15], off offset:612 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[12:15], v[10:11], off offset:2032
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[12:15], off offset:628 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v12, vcc_lo, 0x300, v4
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v13, vcc_lo, 0, v22, vcc_lo
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[14:17], v[12:13], off offset:1920
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[14:17], off offset:644 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[14:17], v[12:13], off offset:1936
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[14:17], off offset:660 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[14:17], v[12:13], off offset:1952
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[14:17], off offset:676 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[14:17], v[12:13], off offset:1968
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[14:17], off offset:692 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[14:17], v[12:13], off offset:1984
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[14:17], off offset:708 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[14:17], v[12:13], off offset:2000
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[14:17], off offset:724 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[14:17], v[12:13], off offset:2016
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[14:17], off offset:740 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[14:17], v[12:13], off offset:2032
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[14:17], off offset:756 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v14, vcc_lo, 0x380, v4
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v15, vcc_lo, 0, v22, vcc_lo
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[16:19], v[14:15], off offset:1920
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[16:19], off offset:772 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[16:19], v[14:15], off offset:1936
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[16:19], off offset:788 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[16:19], v[14:15], off offset:1952
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[16:19], off offset:804 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[16:19], v[14:15], off offset:1968
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[16:19], off offset:820 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[16:19], v[14:15], off offset:1984
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[16:19], off offset:836 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[16:19], v[14:15], off offset:2000
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[16:19], off offset:852 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[16:19], v[14:15], off offset:2016
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[16:19], off offset:868 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[16:19], v[14:15], off offset:2032
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[16:19], off offset:884 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v16, vcc_lo, 0x400, v4
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v17, vcc_lo, 0, v22, vcc_lo
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[18:21], v[16:17], off offset:1920
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[18:21], off offset:900 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[18:21], v[16:17], off offset:1936
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[18:21], off offset:916 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[18:21], v[16:17], off offset:1952
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[18:21], off offset:932 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[18:21], v[16:17], off offset:1968
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[18:21], off offset:948 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[18:21], v[16:17], off offset:1984
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[18:21], off offset:964 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[18:21], v[16:17], off offset:2000
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[18:21], off offset:980 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[18:21], v[16:17], off offset:2016
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[18:21], off offset:996 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[18:21], v[16:17], off offset:2032
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[18:21], off offset:1012 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v18, vcc_lo, 0x480, v4
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v19, vcc_lo, 0, v22, vcc_lo
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v20, vcc_lo, 0x500, v4
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v21, vcc_lo, 0, v22, vcc_lo
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[18:19], off offset:1920
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1028 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[18:19], off offset:1936
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1044 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[18:19], off offset:1952
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1060 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[18:19], off offset:1968
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1076 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[18:19], off offset:1984
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1092 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[18:19], off offset:2000
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1108 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[18:19], off offset:2016
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1124 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[18:19], off offset:2032
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1140 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:1920
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1156 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:1936
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1172 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:1952
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1188 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:1968
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1204 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:1984
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1220 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:2000
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1236 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:2016
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1252 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:2032
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v20, vcc_lo, 0x580, v4
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v21, vcc_lo, 0, v22, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1268 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:1920
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1284 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:1936
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1300 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:1952
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1316 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:1968
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1332 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:1984
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1348 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:2000
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1364 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:2016
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1380 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:2032
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v20, vcc_lo, 0x600, v4
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v21, vcc_lo, 0, v22, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1396 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:1920
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1412 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:1936
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1428 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:1952
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1444 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:1968
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1460 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:1984
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1476 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:2000
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1492 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:2016
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1508 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:2032
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v20, vcc_lo, 0x680, v4
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v21, vcc_lo, 0, v22, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1524 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:1920
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1540 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:1936
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1556 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:1952
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1572 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:1968
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1588 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:1984
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1604 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:2000
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1620 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:2016
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1636 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:2032
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v20, vcc_lo, 0x700, v4
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v21, vcc_lo, 0, v22, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1652 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:1920
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1668 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:1936
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1684 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:1952
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1700 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:1968
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1716 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:1984
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1732 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:2000
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1748 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:2016
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1764 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:2032
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v20, vcc_lo, 0x780, v4
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v21, vcc_lo, 0, v22, vcc_lo
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v0, vcc_lo, 0x780, v0
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1780 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[22:25], v[20:21], off offset:1920
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[22:25], off offset:1796 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[22:25], v[20:21], off offset:1936
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[22:25], off offset:1812 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[22:25], v[20:21], off offset:1952
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[22:25], off offset:1828 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[22:25], v[20:21], off offset:1968
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[22:25], off offset:1844 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[22:25], v[20:21], off offset:1984
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[22:25], off offset:1860 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[22:25], v[20:21], off offset:2000
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[22:25], off offset:1876 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[22:25], v[20:21], off offset:2016
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[22:25], off offset:1892 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[20:23], v[20:21], off offset:2032
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[20:23], off offset:1908 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[20:23], v[0:1], off offset:1920
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[20:23], off offset:1924 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[20:23], v[0:1], off offset:1936
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[20:23], off offset:1940 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[20:23], v[0:1], off offset:1952
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[20:23], off offset:1956 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[20:23], v[0:1], off offset:1968
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[20:23], off offset:1972 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[20:23], v[0:1], off offset:1984
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[20:23], off offset:1988 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[20:23], v[0:1], off offset:2000
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[20:23], off offset:2004 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[20:23], v[0:1], off offset:2016
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[20:23], off offset:2020 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[20:23], v[0:1], off offset:2032
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v0, vcc_lo, 0x780, v2
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v3, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[20:23], off offset:2036 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[20:23], v[0:1], off offset:1920
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[20:23], v[0:1], off offset:1936
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x814
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[20:23], v[0:1], off offset:1952
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x824
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[20:23], v[0:1], off offset:1968
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x834
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[20:23], v[0:1], off offset:1984
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x844
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[20:23], v[0:1], off offset:2000
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x854
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    s_clause 0x1
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[20:23], v[0:1], off offset:2016
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2032
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x864
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(1)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x874
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v0, vcc_lo, 0x780, v6
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v7, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x884
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[20:23], v[0:1], off offset:1920
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[20:23], v[0:1], off offset:1936
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x894
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[20:23], v[0:1], off offset:1952
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x8a4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[20:23], v[0:1], off offset:1968
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x8b4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[20:23], v[0:1], off offset:1984
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x8c4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[20:23], v[0:1], off offset:2000
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x8d4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    s_clause 0x1
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[20:23], v[0:1], off offset:2016
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2032
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x8e4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(1)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x8f4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v0, vcc_lo, 0x780, v8
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v9, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x904
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1920
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1936
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x914
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1952
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x924
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1968
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x934
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1984
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x944
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:2000
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x954
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    s_clause 0x1
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:2016
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2032
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x964
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(1)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x974
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v0, vcc_lo, 0x780, v10
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v11, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x984
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1920
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1936
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x994
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1952
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x9a4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1968
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x9b4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1984
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x9c4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:2000
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x9d4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    s_clause 0x1
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:2016
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2032
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x9e4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(1)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x9f4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v0, vcc_lo, 0x780, v12
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v13, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xa04
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1920
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1936
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xa14
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1952
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xa24
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1968
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xa34
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1984
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xa44
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:2000
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xa54
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    s_clause 0x1
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:2016
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2032
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xa64
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(1)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xa74
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v0, vcc_lo, 0x780, v14
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v15, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xa84
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1920
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1936
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xa94
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1952
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xaa4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1968
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xab4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1984
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xac4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:2000
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xad4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    s_clause 0x1
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:2016
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2032
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xae4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(1)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xaf4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v0, vcc_lo, 0x780, v16
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v17, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xb04
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1920
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1936
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xb14
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1952
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xb24
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1968
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xb34
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1984
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xb44
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:2000
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xb54
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    s_clause 0x1
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:2016
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2032
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xb64
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(1)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xb74
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v0, vcc_lo, 0x780, v18
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v19, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xb84
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1920
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1936
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xb94
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1952
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xba4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1968
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xbb4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1984
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xbc4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:2000
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xbd4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    s_clause 0x1
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:2016
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2032
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xbe4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(1)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xbf4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3]
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xc04
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:16
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xc14
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:32
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xc24
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:48
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xc34
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:64
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xc44
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:80
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xc54
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:96
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xc64
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:112
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xc74
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:128
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xc84
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:144
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xc94
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:160
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xca4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:176
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xcb4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:192
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xcc4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:208
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xcd4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:224
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xce4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:240
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xcf4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:256
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xd04
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:272
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xd14
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:288
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xd24
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:304
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xd34
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:320
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xd44
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:336
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xd54
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:352
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xd64
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:368
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xd74
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:384
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xd84
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:400
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xd94
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:416
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xda4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:432
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xdb4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:448
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xdc4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:464
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xdd4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:480
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xde4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:496
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xdf4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:512
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xe04
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:528
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xe14
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:544
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xe24
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:560
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xe34
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:576
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xe44
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:592
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xe54
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:608
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xe64
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:624
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xe74
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:640
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xe84
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:656
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xe94
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:672
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xea4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:688
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xeb4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:704
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xec4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:720
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xed4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:736
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xee4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:752
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xef4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:768
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xf04
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:784
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xf14
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:800
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xf24
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:816
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xf34
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:832
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xf44
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:848
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xf54
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:864
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xf64
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:880
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xf74
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:896
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xf84
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:912
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xf94
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:928
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xfa4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:944
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xfb4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:960
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xfc4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:976
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xfd4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:992
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xfe4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1008
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xff4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1024
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1004
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1040
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1014
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1056
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1024
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1072
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1034
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1088
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1044
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1104
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1054
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1120
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1064
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1136
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1074
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1152
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1084
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1168
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1094
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1184
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x10a4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1200
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x10b4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1216
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x10c4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1232
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x10d4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1248
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x10e4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1264
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x10f4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1280
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1104
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1296
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1114
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1312
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1124
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1328
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1134
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1344
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1144
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1360
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1154
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1376
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1164
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1392
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1174
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1408
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1184
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1424
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1194
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1440
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x11a4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1456
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x11b4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1472
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x11c4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1488
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x11d4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1504
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x11e4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1520
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x11f4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1536
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1204
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1552
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1214
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1568
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1224
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1584
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1234
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1600
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1244
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1616
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1254
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1632
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1264
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1648
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1274
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1664
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1284
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1680
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1294
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1696
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x12a4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1712
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x12b4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1728
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x12c4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1744
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x12d4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1760
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x12e4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1776
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x12f4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1792
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1304
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1808
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1314
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1824
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1324
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1840
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1334
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1856
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1344
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1872
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1354
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1888
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1364
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1904
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1374
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1920
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1384
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1936
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1394
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1952
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x13a4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1968
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x13b4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1984
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x13c4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2000
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x13d4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2016
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x13e4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2032
+; GFX10-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX10-FLATSCR-NEXT:    ;;#ASMEND
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v4, s2, s0, v5
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e64 v6, null, s1, 0, s2
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x13e4
+; GFX10-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX10-FLATSCR-NEXT:    ;;#ASMEND
+; GFX10-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX10-FLATSCR-NEXT:    ;;#ASMEND
+; GFX10-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX10-FLATSCR-NEXT:    ;;#ASMEND
+; GFX10-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX10-FLATSCR-NEXT:    ;;#ASMEND
+; GFX10-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX10-FLATSCR-NEXT:    ;;#ASMEND
+; GFX10-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX10-FLATSCR-NEXT:    ;;#ASMEND
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2032
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x13d4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2016
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x13c4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2000
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x13b4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1984
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x13a4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1968
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1394
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1952
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1384
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1936
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1374
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1920
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1364
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1904
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1354
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1888
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1344
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1872
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1334
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1856
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1324
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1840
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1314
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1824
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1304
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1808
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x12f4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1792
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x12e4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1776
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x12d4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1760
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x12c4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1744
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x12b4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1728
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x12a4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1712
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1294
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1696
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1284
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1680
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1274
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1664
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1264
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1648
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1254
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1632
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1244
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1616
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1234
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1600
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1224
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1584
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1214
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1568
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1204
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1552
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x11f4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1536
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x11e4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1520
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x11d4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1504
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x11c4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1488
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x11b4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1472
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x11a4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1456
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1194
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1440
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1184
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1424
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1174
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1408
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1164
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1392
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1154
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1376
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1144
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1360
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1134
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1344
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1124
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1328
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1114
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1312
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1104
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1296
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x10f4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1280
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x10e4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1264
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x10d4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1248
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x10c4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1232
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x10b4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1216
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x10a4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1200
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1094
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1184
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1084
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1168
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1074
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1152
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1064
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1136
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1054
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1120
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1044
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1104
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1034
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1088
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1024
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1072
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1014
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1056
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1004
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1040
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xff4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1024
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xfe4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1008
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xfd4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:992
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xfc4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:976
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xfb4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:960
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xfa4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:944
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xf94
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:928
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xf84
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:912
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xf74
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:896
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xf64
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:880
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xf54
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:864
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xf44
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:848
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xf34
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:832
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xf24
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:816
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xf14
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:800
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xf04
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:784
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xef4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:768
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xee4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:752
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xed4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:736
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xec4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:720
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xeb4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:704
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xea4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:688
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xe94
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:672
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xe84
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:656
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xe74
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:640
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xe64
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:624
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xe54
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:608
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xe44
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:592
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xe34
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:576
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xe24
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:560
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xe14
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:544
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xe04
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:528
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xdf4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:512
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xde4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:496
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xdd4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:480
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xdc4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:464
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xdb4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:448
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xda4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:432
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xd94
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:416
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xd84
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:400
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xd74
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:384
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xd64
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:368
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xd54
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:352
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xd44
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:336
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xd34
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:320
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xd24
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:304
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xd14
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:288
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xd04
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:272
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xcf4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:256
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xce4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:240
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xcd4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:224
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xcc4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:208
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xcb4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:192
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xca4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:176
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xc94
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:160
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xc84
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:144
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xc74
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:128
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xc64
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:112
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xc54
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:96
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xc44
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:80
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xc34
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:64
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xc24
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:48
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xc14
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:32
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xc04
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:16
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0xbf4
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v0, vcc_lo, 0x480, v4
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v6, vcc_lo
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v2, vcc_lo, 0x780, v0
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0xbe4
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v3, vcc_lo, 0, v1, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[2:3], v[7:10], off offset:2032
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0xbd4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[2:3], v[7:10], off offset:2016
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0xbc4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[2:3], v[7:10], off offset:2000
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0xbb4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[2:3], v[7:10], off offset:1984
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0xba4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[2:3], v[7:10], off offset:1968
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0xb94
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[2:3], v[7:10], off offset:1952
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0xb84
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[2:3], v[7:10], off offset:1936
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0xb74
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[2:3], v[7:10], off offset:1920
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[9:12], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v2, vcc_lo, 0x400, v4
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v3, vcc_lo, 0, v6, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0xb64
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v7, vcc_lo, 0x780, v2
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v8, vcc_lo, 0, v3, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[7:8], v[9:12], off offset:2032
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[9:12], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0xb54
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[7:8], v[9:12], off offset:2016
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[9:12], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0xb44
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[7:8], v[9:12], off offset:2000
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[9:12], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0xb34
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[7:8], v[9:12], off offset:1984
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[9:12], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0xb24
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[7:8], v[9:12], off offset:1968
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[9:12], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0xb14
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[7:8], v[9:12], off offset:1952
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[9:12], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0xb04
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[7:8], v[9:12], off offset:1936
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[9:12], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0xaf4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[7:8], v[9:12], off offset:1920
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[11:14], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v7, vcc_lo, 0x380, v4
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v8, vcc_lo, 0, v6, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0xae4
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v9, vcc_lo, 0x780, v7
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v10, vcc_lo, 0, v8, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[9:10], v[11:14], off offset:2032
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[11:14], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0xad4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[9:10], v[11:14], off offset:2016
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[11:14], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0xac4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[9:10], v[11:14], off offset:2000
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[11:14], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0xab4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[9:10], v[11:14], off offset:1984
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[11:14], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0xaa4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[9:10], v[11:14], off offset:1968
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[11:14], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0xa94
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[9:10], v[11:14], off offset:1952
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[11:14], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0xa84
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[9:10], v[11:14], off offset:1936
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[11:14], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0xa74
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[9:10], v[11:14], off offset:1920
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[13:16], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v9, vcc_lo, 0x300, v4
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v10, vcc_lo, 0, v6, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0xa64
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v11, vcc_lo, 0x780, v9
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v12, vcc_lo, 0, v10, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[11:12], v[13:16], off offset:2032
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[13:16], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0xa54
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[11:12], v[13:16], off offset:2016
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[13:16], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0xa44
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[11:12], v[13:16], off offset:2000
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[13:16], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0xa34
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[11:12], v[13:16], off offset:1984
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[13:16], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0xa24
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[11:12], v[13:16], off offset:1968
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[13:16], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0xa14
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[11:12], v[13:16], off offset:1952
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[13:16], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0xa04
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[11:12], v[13:16], off offset:1936
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[13:16], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0x9f4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[11:12], v[13:16], off offset:1920
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[15:18], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v11, vcc_lo, 0x280, v4
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v12, vcc_lo, 0, v6, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0x9e4
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v13, vcc_lo, 0x780, v11
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v14, vcc_lo, 0, v12, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[13:14], v[15:18], off offset:2032
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[15:18], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0x9d4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[13:14], v[15:18], off offset:2016
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[15:18], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0x9c4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[13:14], v[15:18], off offset:2000
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[15:18], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0x9b4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[13:14], v[15:18], off offset:1984
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[15:18], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0x9a4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[13:14], v[15:18], off offset:1968
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[15:18], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0x994
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[13:14], v[15:18], off offset:1952
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[15:18], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0x984
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[13:14], v[15:18], off offset:1936
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[15:18], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0x974
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[13:14], v[15:18], off offset:1920
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[17:20], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v13, vcc_lo, 0x200, v4
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v14, vcc_lo, 0, v6, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0x964
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v15, vcc_lo, 0x780, v13
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v16, vcc_lo, 0, v14, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[15:16], v[17:20], off offset:2032
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[17:20], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0x954
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[15:16], v[17:20], off offset:2016
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[17:20], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0x944
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[15:16], v[17:20], off offset:2000
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[17:20], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0x934
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[15:16], v[17:20], off offset:1984
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[17:20], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0x924
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[15:16], v[17:20], off offset:1968
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[17:20], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0x914
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[15:16], v[17:20], off offset:1952
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[17:20], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0x904
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[15:16], v[17:20], off offset:1936
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[17:20], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0x8f4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[15:16], v[17:20], off offset:1920
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[19:22], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v15, vcc_lo, 0x180, v4
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v16, vcc_lo, 0, v6, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0x8e4
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v17, vcc_lo, 0x780, v15
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v18, vcc_lo, 0, v16, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[17:18], v[19:22], off offset:2032
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[19:22], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0x8d4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[17:18], v[19:22], off offset:2016
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[19:22], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0x8c4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[17:18], v[19:22], off offset:2000
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[19:22], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0x8b4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[17:18], v[19:22], off offset:1984
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[19:22], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0x8a4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[17:18], v[19:22], off offset:1968
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[19:22], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0x894
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[17:18], v[19:22], off offset:1952
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[19:22], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0x884
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[17:18], v[19:22], off offset:1936
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[19:22], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0x874
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[17:18], v[19:22], off offset:1920
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[21:24], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v17, vcc_lo, 0x100, v4
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v18, vcc_lo, 0, v6, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0x864
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v19, vcc_lo, 0x780, v17
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v20, vcc_lo, 0, v18, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[19:20], v[21:24], off offset:2032
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[21:24], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0x854
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[19:20], v[21:24], off offset:2016
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[21:24], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0x844
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[19:20], v[21:24], off offset:2000
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[21:24], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0x834
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[19:20], v[21:24], off offset:1984
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[21:24], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0x824
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[19:20], v[21:24], off offset:1968
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[21:24], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0x814
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[19:20], v[21:24], off offset:1952
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[21:24], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0x804
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[19:20], v[21:24], off offset:1936
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[21:24], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[19:20], v[21:24], off offset:1920
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:2036 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v19, vcc_lo, 0x80, v4
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v20, vcc_lo, 0, v6, vcc_lo
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v21, vcc_lo, 0x780, v19
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v22, vcc_lo, 0, v20, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:2032
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:2020 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:2016
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:2004 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:2000
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1988 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:1984
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1972 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:1968
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1956 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:1952
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1940 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:1936
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1924 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:1920
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1908 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v21, vcc_lo, 0x780, v4
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v22, vcc_lo, 0, v6, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:2032
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1892 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:2016
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1876 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:2000
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1860 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:1984
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1844 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:1968
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1828 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:1952
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1812 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:1936
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1796 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:1920
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1780 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v21, vcc_lo, 0x700, v4
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v22, vcc_lo, 0, v6, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:2032
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1764 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:2016
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1748 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:2000
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1732 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:1984
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1716 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:1968
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1700 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:1952
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1684 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:1936
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1668 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:1920
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1652 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v21, vcc_lo, 0x680, v4
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v22, vcc_lo, 0, v6, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:2032
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1636 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:2016
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1620 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:2000
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1604 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:1984
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1588 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:1968
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1572 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:1952
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1556 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:1936
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1540 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:1920
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1524 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v21, vcc_lo, 0x600, v4
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v22, vcc_lo, 0, v6, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:2032
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1508 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:2016
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1492 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:2000
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1476 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:1984
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1460 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:1968
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1444 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:1952
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1428 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:1936
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1412 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:1920
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1396 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v21, vcc_lo, 0x580, v4
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v22, vcc_lo, 0, v6, vcc_lo
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v4, vcc_lo, 0x500, v4
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v5, vcc_lo, 0, v6, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:2032
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1380 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:2016
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1364 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:2000
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1348 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:1984
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1332 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:1968
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1316 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:1952
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1300 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:1936
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1284 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:1920
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[21:24], off, off offset:1268 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[21:24], off offset:2032
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[21:24], off, off offset:1252 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[21:24], off offset:2016
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[21:24], off, off offset:1236 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[21:24], off offset:2000
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[21:24], off, off offset:1220 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[21:24], off offset:1984
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[21:24], off, off offset:1204 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[21:24], off offset:1968
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[21:24], off, off offset:1188 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[21:24], off offset:1952
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[21:24], off, off offset:1172 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[21:24], off offset:1936
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[21:24], off, off offset:1156 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[21:24], off offset:1920
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[21:24], off, off offset:1140 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[21:24], off offset:2032
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[21:24], off, off offset:1124 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[21:24], off offset:2016
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[21:24], off, off offset:1108 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[21:24], off offset:2000
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[21:24], off, off offset:1092 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[21:24], off offset:1984
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[21:24], off, off offset:1076 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[21:24], off offset:1968
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[21:24], off, off offset:1060 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[21:24], off offset:1952
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[21:24], off, off offset:1044 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[21:24], off offset:1936
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[21:24], off, off offset:1028 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[21:24], off offset:1920
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[21:24], off, off offset:1012 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[2:3], v[21:24], off offset:2032
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[21:24], off, off offset:996 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[2:3], v[21:24], off offset:2016
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[21:24], off, off offset:980 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[2:3], v[21:24], off offset:2000
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[21:24], off, off offset:964 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[2:3], v[21:24], off offset:1984
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[21:24], off, off offset:948 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[2:3], v[21:24], off offset:1968
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[21:24], off, off offset:932 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[2:3], v[21:24], off offset:1952
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[21:24], off, off offset:916 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[2:3], v[21:24], off offset:1936
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[21:24], off, off offset:900 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[2:3], v[21:24], off offset:1920
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:884 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[7:8], v[0:3], off offset:2032
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:868 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[7:8], v[0:3], off offset:2016
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:852 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[7:8], v[0:3], off offset:2000
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:836 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[7:8], v[0:3], off offset:1984
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:820 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[7:8], v[0:3], off offset:1968
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:804 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[7:8], v[0:3], off offset:1952
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:788 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[7:8], v[0:3], off offset:1936
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:772 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[7:8], v[0:3], off offset:1920
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:756 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[9:10], v[0:3], off offset:2032
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:740 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[9:10], v[0:3], off offset:2016
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:724 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[9:10], v[0:3], off offset:2000
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:708 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[9:10], v[0:3], off offset:1984
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:692 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[9:10], v[0:3], off offset:1968
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:676 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[9:10], v[0:3], off offset:1952
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:660 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[9:10], v[0:3], off offset:1936
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:644 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[9:10], v[0:3], off offset:1920
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:628 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[11:12], v[0:3], off offset:2032
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:612 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[11:12], v[0:3], off offset:2016
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:596 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[11:12], v[0:3], off offset:2000
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:580 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[11:12], v[0:3], off offset:1984
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:564 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[11:12], v[0:3], off offset:1968
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:548 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[11:12], v[0:3], off offset:1952
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:532 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[11:12], v[0:3], off offset:1936
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:516 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[11:12], v[0:3], off offset:1920
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:500 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[13:14], v[0:3], off offset:2032
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:484 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[13:14], v[0:3], off offset:2016
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:468 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[13:14], v[0:3], off offset:2000
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:452 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[13:14], v[0:3], off offset:1984
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:436 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[13:14], v[0:3], off offset:1968
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:420 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[13:14], v[0:3], off offset:1952
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:404 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[13:14], v[0:3], off offset:1936
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:388 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[13:14], v[0:3], off offset:1920
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:372 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[15:16], v[0:3], off offset:2032
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:356 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[15:16], v[0:3], off offset:2016
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:340 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[15:16], v[0:3], off offset:2000
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:324 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[15:16], v[0:3], off offset:1984
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:308 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[15:16], v[0:3], off offset:1968
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:292 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[15:16], v[0:3], off offset:1952
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:276 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[15:16], v[0:3], off offset:1936
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:260 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[15:16], v[0:3], off offset:1920
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:244 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[17:18], v[0:3], off offset:2032
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:228 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[17:18], v[0:3], off offset:2016
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:212 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[17:18], v[0:3], off offset:2000
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:196 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[17:18], v[0:3], off offset:1984
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:180 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[17:18], v[0:3], off offset:1968
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:164 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[17:18], v[0:3], off offset:1952
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:148 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[17:18], v[0:3], off offset:1936
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:132 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[17:18], v[0:3], off offset:1920
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:116 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[19:20], v[0:3], off offset:2032
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:100 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[19:20], v[0:3], off offset:2016
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:84 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[19:20], v[0:3], off offset:2000
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:68 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[19:20], v[0:3], off offset:1984
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:52 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[19:20], v[0:3], off offset:1968
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:36 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[19:20], v[0:3], off offset:1952
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:20 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[19:20], v[0:3], off offset:1936
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:4 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[19:20], v[0:3], off offset:1920
+; GFX10-FLATSCR-NEXT:    s_endpgm
 entry:
   %lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
   %tid = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %lo)
@@ -45,79 +10073,1028 @@ entry:
   ret void
 }
 
-; CHECK-LABEL: test_limited_sgpr
-; GFX6: %bb.1:
-; GFX6: s_mov_b64 exec, 0xff
-; GFX6: buffer_store_dword [[SPILL_REG_0:v[0-9]+]]
-; GFX6-COUNT-8: v_writelane_b32 [[SPILL_REG_0]]
-; GFX6: v_mov_b32_e32 [[OFFSET_REG0:v[0-9]+]], 0x[[OFFSET0:[0-9a-f]+]]
-; GFX6: buffer_store_dword [[SPILL_REG_0]], [[OFFSET_REG0]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen
-; GFX6: buffer_load_dword [[SPILL_REG_0]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
-; GFX6: s_mov_b64 exec, s
-
-
-; GFX6: s_mov_b64 exec, 0xff
-; GFX6: v_mov_b32_e32 [[RELOAD_OFFSET_REG0:v[0-9]+]], 0x[[RELOAD_OFFSET0:[0-9a-f]+]]
-; GFX6: buffer_store_dword [[RELOAD_REG_0:v[0-9]+]], off,
-; GFX6: buffer_load_dword [[RELOAD_REG_0]], [[RELOAD_OFFSET_REG0]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen
-; GFX6-COUNT-8: v_readlane_b32 s{{[0-9]+}}, [[RELOAD_REG_0]]
-; GFX6: buffer_load_dword [[RELOAD_REG_0]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
-; GFX6: s_mov_b64 exec,
-
-
-; GFX6: s_mov_b64 exec, 0xff
-; GFX6: buffer_store_dword [[SPILL_REG_1:v[0-9]+]]
-; GFX6-COUNT-8: v_writelane_b32 [[SPILL_REG_1]]
-; GFX6: v_mov_b32_e32 [[OFFSET_REG1:v[0-9]+]], 0x[[OFFSET1:[0-9a-f]+]]
-; GFX6: buffer_store_dword [[SPILL_REG_1]], [[OFFSET_REG1]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen
-; GFX6: buffer_load_dword [[SPILL_REG_1]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
-; GFX6: s_mov_b64 exec, s
-
-
-; GFX6: s_mov_b64 exec, 0xff
-; GFX6: v_mov_b32_e32 [[RELOAD_OFFSET_REG1:v[0-9]+]], 0x[[RELOAD_OFFSET1:[0-9a-f]+]]
-; GFX6: buffer_store_dword [[RELOAD_REG_1:v[0-9]+]], off,
-; GFX6: buffer_load_dword [[RELOAD_REG_1]], [[RELOAD_OFFSET_REG1]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen
-; GFX6-COUNT-8: v_readlane_b32 s{{[0-9]+}}, [[RELOAD_REG_1]]
-; GFX6: buffer_load_dword [[RELOAD_REG_1]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
-; GFX6: s_mov_b64 exec,
-
-
-; GFX6: s_mov_b64 exec, 0xff
-; GFX6: buffer_store_dword [[SPILL_REG_2:v[0-9]+]]
-; GFX6-COUNT-8: v_writelane_b32 [[SPILL_REG_2]]
-; GFX6: v_mov_b32_e32 [[OFFSET_REG2:v[0-9]+]], 0x[[OFFSET2:[0-9a-f]+]]
-; GFX6: buffer_store_dword [[SPILL_REG_2]], [[OFFSET_REG2]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen
-; GFX6: buffer_load_dword [[SPILL_REG_2]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
-; GFX6: s_mov_b64 exec, s
-
-
-; GFX6: s_mov_b64 exec, 0xff
-; GFX6: buffer_store_dword [[SPILL_REG_3:v[0-9]+]]
-; GFX6-COUNT-8: v_writelane_b32 [[SPILL_REG_3]]
-; GFX6: v_mov_b32_e32 [[OFFSET_REG3:v[0-9]+]], 0x[[OFFSET3:[0-9a-f]+]]
-; GFX6: buffer_store_dword [[SPILL_REG_3]], [[OFFSET_REG3]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen
-; GFX6: buffer_load_dword [[SPILL_REG_3]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
-; GFX6: s_mov_b64 exec, s
-
-
-; GFX6: s_mov_b64 exec, 0xff
-; GFX6: buffer_store_dword [[SPILL_REG_4:v[0-9]+]]
-; GFX6-COUNT-4: v_writelane_b32 [[SPILL_REG_4]]
-; GFX6: v_mov_b32_e32 [[OFFSET_REG4:v[0-9]+]], 0x[[OFFSET4:[0-9a-f]+]]
-; GFX6: buffer_store_dword [[SPILL_REG_4]], [[OFFSET_REG4]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen
-; GFX6: buffer_load_dword [[SPILL_REG_4]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
-; GFX6: s_mov_b64 exec, s
-
-; GFX6: NumSgprs: 48
-; GFX6: ScratchSize: 8608
-
-; FLATSCR:           s_movk_i32 [[SOFF1:s[0-9]+]], 0x
-; GFX9-FLATSCR:      s_waitcnt vmcnt(0)
-; FLATSCR:           scratch_store_dwordx4 off, v[{{[0-9:]+}}], [[SOFF1]] ; 16-byte Folded Spill
-; FLATSCR:           s_movk_i32 [[SOFF2:s[0-9]+]], 0x
-; FLATSCR:           scratch_load_dwordx4 v[{{[0-9:]+}}], off, [[SOFF2]] ; 16-byte Folded Reload
 define amdgpu_kernel void @test_limited_sgpr(<64 x i32> addrspace(1)* %out, <64 x i32> addrspace(1)* %in) #0 {
+; GFX6-LABEL: test_limited_sgpr:
+; GFX6:       ; %bb.0: ; %entry
+; GFX6-NEXT:    s_mov_b32 s40, SCRATCH_RSRC_DWORD0
+; GFX6-NEXT:    s_mov_b32 s41, SCRATCH_RSRC_DWORD1
+; GFX6-NEXT:    s_mov_b32 s42, -1
+; GFX6-NEXT:    s_mov_b32 s43, 0xe8f000
+; GFX6-NEXT:    s_add_u32 s40, s40, s3
+; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; GFX6-NEXT:    v_mbcnt_lo_u32_b32_e64 v0, -1, 0
+; GFX6-NEXT:    v_mbcnt_hi_u32_b32_e32 v5, -1, v0
+; GFX6-NEXT:    v_mov_b32_e32 v6, 0
+; GFX6-NEXT:    s_mov_b32 s38, 0
+; GFX6-NEXT:    s_mov_b32 s39, 0xf000
+; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX6-NEXT:    s_mov_b64 s[36:37], s[2:3]
+; GFX6-NEXT:    v_lshlrev_b32_e32 v7, 8, v5
+; GFX6-NEXT:    v_mov_b32_e32 v8, v6
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 offset:240
+; GFX6-NEXT:    s_addc_u32 s41, s41, 0
+; GFX6-NEXT:    s_mov_b32 s2, 0x83400
+; GFX6-NEXT:    s_mov_b64 s[44:45], exec
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 offset:224
+; GFX6-NEXT:    s_mov_b32 s2, 0x83000
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 offset:208
+; GFX6-NEXT:    s_mov_b32 s2, 0x82c00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 offset:192
+; GFX6-NEXT:    s_mov_b32 s2, 0x82800
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 offset:176
+; GFX6-NEXT:    s_mov_b32 s2, 0x82400
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 offset:160
+; GFX6-NEXT:    s_mov_b32 s2, 0x82000
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 offset:144
+; GFX6-NEXT:    s_mov_b32 s2, 0x81c00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 offset:128
+; GFX6-NEXT:    s_mov_b32 s2, 0x81800
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 offset:112
+; GFX6-NEXT:    s_mov_b32 s2, 0x81400
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 offset:96
+; GFX6-NEXT:    s_mov_b32 s2, 0x81000
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 offset:80
+; GFX6-NEXT:    s_mov_b32 s2, 0x80c00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 offset:64
+; GFX6-NEXT:    s_mov_b32 s2, 0x80400
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64
+; GFX6-NEXT:    buffer_load_dwordx4 v[9:12], v[7:8], s[36:39], 0 addr64 offset:16
+; GFX6-NEXT:    s_mov_b32 s2, 0x80800
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v9, off, s[40:43], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v10, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v11, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v12, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_load_dwordx4 v[13:16], v[7:8], s[36:39], 0 addr64 offset:32
+; GFX6-NEXT:    buffer_load_dwordx4 v[17:20], v[7:8], s[36:39], 0 addr64 offset:48
+; GFX6-NEXT:    v_lshlrev_b32_e32 v4, 13, v0
+; GFX6-NEXT:    v_add_i32_e32 v4, vcc, 16, v4
+; GFX6-NEXT:    v_mov_b32_e32 v7, 1
+; GFX6-NEXT:    buffer_store_dword v7, v4, s[40:43], 0 offen
+; GFX6-NEXT:    ;;#ASMSTART
+; GFX6-NEXT:    ; def s[4:11]
+; GFX6-NEXT:    ;;#ASMEND
+; GFX6-NEXT:    s_mov_b64 exec, 0xff
+; GFX6-NEXT:    buffer_store_dword v4, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    v_writelane_b32 v4, s4, 0
+; GFX6-NEXT:    v_writelane_b32 v4, s5, 1
+; GFX6-NEXT:    v_writelane_b32 v4, s6, 2
+; GFX6-NEXT:    v_writelane_b32 v4, s7, 3
+; GFX6-NEXT:    v_writelane_b32 v4, s8, 4
+; GFX6-NEXT:    v_writelane_b32 v4, s9, 5
+; GFX6-NEXT:    v_writelane_b32 v4, s10, 6
+; GFX6-NEXT:    v_writelane_b32 v4, s11, 7
+; GFX6-NEXT:    s_mov_b32 s2, 0x83800
+; GFX6-NEXT:    buffer_store_dword v4, off, s[40:43], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v4, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    s_mov_b64 exec, s[44:45]
+; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX6-NEXT:    ;;#ASMSTART
+; GFX6-NEXT:    ; def s[8:15]
+; GFX6-NEXT:    ;;#ASMEND
+; GFX6-NEXT:    ;;#ASMSTART
+; GFX6-NEXT:    ; def s[16:23]
+; GFX6-NEXT:    ;;#ASMEND
+; GFX6-NEXT:    ;;#ASMSTART
+; GFX6-NEXT:    ; def s[24:31]
+; GFX6-NEXT:    ;;#ASMEND
+; GFX6-NEXT:    ;;#ASMSTART
+; GFX6-NEXT:    ; def s[4:7]
+; GFX6-NEXT:    ;;#ASMEND
+; GFX6-NEXT:    ;;#ASMSTART
+; GFX6-NEXT:    ; def s[2:3]
+; GFX6-NEXT:    ;;#ASMEND
+; GFX6-NEXT:    ;;#ASMSTART
+; GFX6-NEXT:    ; def s[36:37]
+; GFX6-NEXT:    ;;#ASMEND
+; GFX6-NEXT:    ;;#ASMSTART
+; GFX6-NEXT:    ; def s33
+; GFX6-NEXT:    ;;#ASMEND
+; GFX6-NEXT:    s_and_saveexec_b64 s[34:35], vcc
+; GFX6-NEXT:    s_cbranch_execz .LBB1_2
+; GFX6-NEXT:  ; %bb.1: ; %bb0
+; GFX6-NEXT:    s_mov_b64 s[44:45], exec
+; GFX6-NEXT:    s_mov_b64 exec, 0xff
+; GFX6-NEXT:    buffer_store_dword v9, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    v_writelane_b32 v9, s8, 0
+; GFX6-NEXT:    v_writelane_b32 v9, s9, 1
+; GFX6-NEXT:    v_writelane_b32 v9, s10, 2
+; GFX6-NEXT:    v_writelane_b32 v9, s11, 3
+; GFX6-NEXT:    v_writelane_b32 v9, s12, 4
+; GFX6-NEXT:    v_writelane_b32 v9, s13, 5
+; GFX6-NEXT:    v_writelane_b32 v9, s14, 6
+; GFX6-NEXT:    v_writelane_b32 v9, s15, 7
+; GFX6-NEXT:    v_mov_b32_e32 v4, 0x2100
+; GFX6-NEXT:    buffer_store_dword v9, v4, s[40:43], 0 offen ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    s_mov_b64 exec, s[44:45]
+; GFX6-NEXT:    s_mov_b64 s[44:45], exec
+; GFX6-NEXT:    s_mov_b64 exec, 0xff
+; GFX6-NEXT:    v_mov_b32_e32 v4, 0x20e0
+; GFX6-NEXT:    buffer_store_dword v8, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v8, v4, s[40:43], 0 offen ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    v_readlane_b32 s8, v8, 0
+; GFX6-NEXT:    v_readlane_b32 s9, v8, 1
+; GFX6-NEXT:    v_readlane_b32 s10, v8, 2
+; GFX6-NEXT:    v_readlane_b32 s11, v8, 3
+; GFX6-NEXT:    v_readlane_b32 s12, v8, 4
+; GFX6-NEXT:    v_readlane_b32 s13, v8, 5
+; GFX6-NEXT:    v_readlane_b32 s14, v8, 6
+; GFX6-NEXT:    v_readlane_b32 s15, v8, 7
+; GFX6-NEXT:    buffer_load_dword v8, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    s_mov_b64 exec, s[44:45]
+; GFX6-NEXT:    s_mov_b64 s[44:45], exec
+; GFX6-NEXT:    s_mov_b64 exec, 0xff
+; GFX6-NEXT:    buffer_store_dword v7, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    v_writelane_b32 v7, s16, 0
+; GFX6-NEXT:    v_writelane_b32 v7, s17, 1
+; GFX6-NEXT:    v_writelane_b32 v7, s18, 2
+; GFX6-NEXT:    v_writelane_b32 v7, s19, 3
+; GFX6-NEXT:    v_writelane_b32 v7, s20, 4
+; GFX6-NEXT:    v_writelane_b32 v7, s21, 5
+; GFX6-NEXT:    v_writelane_b32 v7, s22, 6
+; GFX6-NEXT:    v_writelane_b32 v7, s23, 7
+; GFX6-NEXT:    v_mov_b32_e32 v4, 0x2120
+; GFX6-NEXT:    buffer_store_dword v7, v4, s[40:43], 0 offen ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v7, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    s_mov_b64 exec, s[44:45]
+; GFX6-NEXT:    s_mov_b64 s[44:45], exec
+; GFX6-NEXT:    s_mov_b64 exec, 0xff
+; GFX6-NEXT:    v_mov_b32_e32 v4, 0x2100
+; GFX6-NEXT:    buffer_store_dword v9, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, v4, s[40:43], 0 offen ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    v_readlane_b32 s16, v9, 0
+; GFX6-NEXT:    v_readlane_b32 s17, v9, 1
+; GFX6-NEXT:    v_readlane_b32 s18, v9, 2
+; GFX6-NEXT:    v_readlane_b32 s19, v9, 3
+; GFX6-NEXT:    v_readlane_b32 s20, v9, 4
+; GFX6-NEXT:    v_readlane_b32 s21, v9, 5
+; GFX6-NEXT:    v_readlane_b32 s22, v9, 6
+; GFX6-NEXT:    v_readlane_b32 s23, v9, 7
+; GFX6-NEXT:    buffer_load_dword v9, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    s_mov_b64 exec, s[44:45]
+; GFX6-NEXT:    s_mov_b64 s[44:45], exec
+; GFX6-NEXT:    s_mov_b64 exec, 0xff
+; GFX6-NEXT:    buffer_store_dword v8, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    v_writelane_b32 v8, s24, 0
+; GFX6-NEXT:    v_writelane_b32 v8, s25, 1
+; GFX6-NEXT:    v_writelane_b32 v8, s26, 2
+; GFX6-NEXT:    v_writelane_b32 v8, s27, 3
+; GFX6-NEXT:    v_writelane_b32 v8, s28, 4
+; GFX6-NEXT:    v_writelane_b32 v8, s29, 5
+; GFX6-NEXT:    v_writelane_b32 v8, s30, 6
+; GFX6-NEXT:    v_writelane_b32 v8, s31, 7
+; GFX6-NEXT:    v_mov_b32_e32 v4, 0x2140
+; GFX6-NEXT:    buffer_store_dword v8, v4, s[40:43], 0 offen ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v8, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    s_mov_b64 exec, s[44:45]
+; GFX6-NEXT:    s_mov_b64 s[44:45], exec
+; GFX6-NEXT:    s_mov_b64 exec, 0xff
+; GFX6-NEXT:    v_mov_b32_e32 v4, 0x2120
+; GFX6-NEXT:    buffer_store_dword v7, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v7, v4, s[40:43], 0 offen ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    v_readlane_b32 s24, v7, 0
+; GFX6-NEXT:    v_readlane_b32 s25, v7, 1
+; GFX6-NEXT:    v_readlane_b32 s26, v7, 2
+; GFX6-NEXT:    v_readlane_b32 s27, v7, 3
+; GFX6-NEXT:    v_readlane_b32 s28, v7, 4
+; GFX6-NEXT:    v_readlane_b32 s29, v7, 5
+; GFX6-NEXT:    v_readlane_b32 s30, v7, 6
+; GFX6-NEXT:    v_readlane_b32 s31, v7, 7
+; GFX6-NEXT:    buffer_load_dword v7, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    s_mov_b64 exec, s[44:45]
+; GFX6-NEXT:    s_mov_b64 s[44:45], exec
+; GFX6-NEXT:    s_mov_b64 exec, 15
+; GFX6-NEXT:    buffer_store_dword v10, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    v_writelane_b32 v10, s0, 0
+; GFX6-NEXT:    v_writelane_b32 v10, s1, 1
+; GFX6-NEXT:    v_writelane_b32 v10, s2, 2
+; GFX6-NEXT:    v_writelane_b32 v10, s3, 3
+; GFX6-NEXT:    v_mov_b32_e32 v4, 0x2160
+; GFX6-NEXT:    buffer_store_dword v10, v4, s[40:43], 0 offen ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v10, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    s_mov_b64 exec, s[44:45]
+; GFX6-NEXT:    s_mov_b64 s[44:45], exec
+; GFX6-NEXT:    s_mov_b64 exec, 15
+; GFX6-NEXT:    buffer_store_dword v8, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    v_writelane_b32 v8, s4, 0
+; GFX6-NEXT:    v_writelane_b32 v8, s5, 1
+; GFX6-NEXT:    v_writelane_b32 v8, s6, 2
+; GFX6-NEXT:    v_writelane_b32 v8, s7, 3
+; GFX6-NEXT:    s_mov_b32 s0, 0x85c00
+; GFX6-NEXT:    buffer_store_dword v8, off, s[40:43], s0 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v8, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    s_mov_b64 exec, s[44:45]
+; GFX6-NEXT:    s_mov_b64 s[0:1], exec
+; GFX6-NEXT:    s_mov_b64 exec, 3
+; GFX6-NEXT:    buffer_store_dword v9, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    v_writelane_b32 v9, s2, 0
+; GFX6-NEXT:    v_writelane_b32 v9, s3, 1
+; GFX6-NEXT:    s_mov_b32 s4, 0x86600
+; GFX6-NEXT:    buffer_store_dword v9, off, s[40:43], s4 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    s_mov_b64 exec, s[0:1]
+; GFX6-NEXT:    s_mov_b64 s[44:45], exec
+; GFX6-NEXT:    s_mov_b64 exec, 0xff
+; GFX6-NEXT:    v_mov_b32_e32 v4, 0x2140
+; GFX6-NEXT:    buffer_store_dword v7, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v7, v4, s[40:43], 0 offen ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    v_readlane_b32 s0, v7, 0
+; GFX6-NEXT:    v_readlane_b32 s1, v7, 1
+; GFX6-NEXT:    v_readlane_b32 s2, v7, 2
+; GFX6-NEXT:    v_readlane_b32 s3, v7, 3
+; GFX6-NEXT:    v_readlane_b32 s4, v7, 4
+; GFX6-NEXT:    v_readlane_b32 s5, v7, 5
+; GFX6-NEXT:    v_readlane_b32 s6, v7, 6
+; GFX6-NEXT:    v_readlane_b32 s7, v7, 7
+; GFX6-NEXT:    buffer_load_dword v7, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    s_mov_b64 exec, s[44:45]
+; GFX6-NEXT:    s_mov_b64 s[44:45], exec
+; GFX6-NEXT:    s_mov_b64 exec, 15
+; GFX6-NEXT:    buffer_store_dword v8, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    v_writelane_b32 v8, s36, 0
+; GFX6-NEXT:    v_writelane_b32 v8, s37, 1
+; GFX6-NEXT:    v_writelane_b32 v8, s38, 2
+; GFX6-NEXT:    v_writelane_b32 v8, s39, 3
+; GFX6-NEXT:    v_mov_b32_e32 v4, 0x2180
+; GFX6-NEXT:    buffer_store_dword v8, v4, s[40:43], 0 offen ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v8, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    s_mov_b64 exec, s[44:45]
+; GFX6-NEXT:    s_mov_b64 s[38:39], exec
+; GFX6-NEXT:    s_mov_b64 exec, 3
+; GFX6-NEXT:    buffer_store_dword v10, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    v_writelane_b32 v10, s36, 0
+; GFX6-NEXT:    v_writelane_b32 v10, s37, 1
+; GFX6-NEXT:    s_mov_b32 s44, 0x86400
+; GFX6-NEXT:    buffer_store_dword v10, off, s[40:43], s44 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v10, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    s_mov_b64 exec, s[38:39]
+; GFX6-NEXT:    s_mov_b64 s[44:45], exec
+; GFX6-NEXT:    s_mov_b64 exec, 15
+; GFX6-NEXT:    v_mov_b32_e32 v4, 0x2170
+; GFX6-NEXT:    buffer_store_dword v9, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, v4, s[40:43], 0 offen ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    v_readlane_b32 s36, v9, 0
+; GFX6-NEXT:    v_readlane_b32 s37, v9, 1
+; GFX6-NEXT:    v_readlane_b32 s38, v9, 2
+; GFX6-NEXT:    v_readlane_b32 s39, v9, 3
+; GFX6-NEXT:    buffer_load_dword v9, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    s_mov_b64 exec, s[44:45]
+; GFX6-NEXT:    s_not_b64 exec, exec
+; GFX6-NEXT:    v_mov_b32_e32 v4, 0x2190
+; GFX6-NEXT:    buffer_store_dword v7, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v7, v4, s[40:43], 0 offen ; 4-byte Folded Reload
+; GFX6-NEXT:    s_not_b64 exec, exec
+; GFX6-NEXT:    v_mov_b32_e32 v4, 0x2190
+; GFX6-NEXT:    buffer_load_dword v7, v4, s[40:43], 0 offen ; 4-byte Folded Reload
+; GFX6-NEXT:    s_not_b64 exec, exec
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    v_readlane_b32 s44, v7, 0
+; GFX6-NEXT:    v_readlane_b32 s45, v7, 1
+; GFX6-NEXT:    buffer_load_dword v7, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    s_not_b64 exec, exec
+; GFX6-NEXT:    s_mov_b64 vcc, s[34:35]
+; GFX6-NEXT:    s_not_b64 exec, exec
+; GFX6-NEXT:    v_mov_b32_e32 v4, 0x2198
+; GFX6-NEXT:    buffer_store_dword v8, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v8, v4, s[40:43], 0 offen ; 4-byte Folded Reload
+; GFX6-NEXT:    s_not_b64 exec, exec
+; GFX6-NEXT:    v_mov_b32_e32 v4, 0x2198
+; GFX6-NEXT:    buffer_load_dword v8, v4, s[40:43], 0 offen ; 4-byte Folded Reload
+; GFX6-NEXT:    s_not_b64 exec, exec
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    v_readlane_b32 s34, v8, 0
+; GFX6-NEXT:    v_readlane_b32 s35, v8, 1
+; GFX6-NEXT:    buffer_load_dword v8, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    s_not_b64 exec, exec
+; GFX6-NEXT:    ;;#ASMSTART
+; GFX6-NEXT:    ; use s[8:15],s[16:23],s[24:31],s[0:7],s[36:39],s[34:35],s[44:45]
+; GFX6-NEXT:    ;;#ASMEND
+; GFX6-NEXT:    s_mov_b64 s[34:35], vcc
+; GFX6-NEXT:    s_mov_b64 s[8:9], exec
+; GFX6-NEXT:    s_mov_b64 exec, 15
+; GFX6-NEXT:    s_mov_b32 s0, 0x86000
+; GFX6-NEXT:    buffer_store_dword v4, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v4, off, s[40:43], s0 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    v_readlane_b32 s36, v4, 0
+; GFX6-NEXT:    v_readlane_b32 s37, v4, 1
+; GFX6-NEXT:    v_readlane_b32 s38, v4, 2
+; GFX6-NEXT:    v_readlane_b32 s39, v4, 3
+; GFX6-NEXT:    buffer_load_dword v4, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    s_mov_b64 exec, s[8:9]
+; GFX6-NEXT:    s_mov_b64 s[4:5], exec
+; GFX6-NEXT:    s_mov_b64 exec, 15
+; GFX6-NEXT:    s_mov_b32 s6, 0x85800
+; GFX6-NEXT:    buffer_store_dword v7, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v7, off, s[40:43], s6 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    v_readlane_b32 s0, v7, 0
+; GFX6-NEXT:    v_readlane_b32 s1, v7, 1
+; GFX6-NEXT:    v_readlane_b32 s2, v7, 2
+; GFX6-NEXT:    v_readlane_b32 s3, v7, 3
+; GFX6-NEXT:    buffer_load_dword v7, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    s_mov_b64 exec, s[4:5]
+; GFX6-NEXT:    s_mov_b32 s2, 0x83800
+; GFX6-NEXT:    buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_mov_b32 s2, 0x84000
+; GFX6-NEXT:    buffer_store_dword v13, off, s[40:43], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v14, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v15, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v16, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_mov_b32 s2, 0x84800
+; GFX6-NEXT:    buffer_store_dword v17, off, s[40:43], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v18, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v19, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v20, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    ;;#ASMSTART
+; GFX6-NEXT:    ;;#ASMEND
+; GFX6-NEXT:    s_mov_b32 s2, 0x84800
+; GFX6-NEXT:    buffer_load_dword v17, off, s[40:43], s2 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v18, off, s[40:43], s2 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v19, off, s[40:43], s2 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v20, off, s[40:43], s2 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s2, 0x84000
+; GFX6-NEXT:    buffer_load_dword v13, off, s[40:43], s2 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v14, off, s[40:43], s2 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v15, off, s[40:43], s2 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v16, off, s[40:43], s2 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s2, 0x83800
+; GFX6-NEXT:    buffer_load_dword v0, off, s[40:43], s2 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    ;;#ASMSTART
+; GFX6-NEXT:    ;;#ASMEND
+; GFX6-NEXT:    ;;#ASMSTART
+; GFX6-NEXT:    ;;#ASMEND
+; GFX6-NEXT:    ;;#ASMSTART
+; GFX6-NEXT:    ;;#ASMEND
+; GFX6-NEXT:    ;;#ASMSTART
+; GFX6-NEXT:    ;;#ASMEND
+; GFX6-NEXT:    ;;#ASMSTART
+; GFX6-NEXT:    ;;#ASMEND
+; GFX6-NEXT:    ;;#ASMSTART
+; GFX6-NEXT:    ;;#ASMEND
+; GFX6-NEXT:  .LBB1_2: ; %ret
+; GFX6-NEXT:    s_or_b64 exec, exec, s[34:35]
+; GFX6-NEXT:    s_mov_b32 s4, 0x83400
+; GFX6-NEXT:    v_lshl_b64 v[4:5], v[5:6], 8
+; GFX6-NEXT:    buffer_load_dword v6, off, s[40:43], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v7, off, s[40:43], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v8, off, s[40:43], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v9, off, s[40:43], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; GFX6-NEXT:    s_mov_b32 s4, 0x83000
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], v[4:5], s[0:3], 0 addr64 offset:240
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v6, off, s[40:43], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v7, off, s[40:43], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v8, off, s[40:43], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v9, off, s[40:43], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x82c00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], v[4:5], s[0:3], 0 addr64 offset:224
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v6, off, s[40:43], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v7, off, s[40:43], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v8, off, s[40:43], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v9, off, s[40:43], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x82800
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], v[4:5], s[0:3], 0 addr64 offset:208
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v6, off, s[40:43], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v7, off, s[40:43], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v8, off, s[40:43], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v9, off, s[40:43], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x82400
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], v[4:5], s[0:3], 0 addr64 offset:192
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v6, off, s[40:43], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v7, off, s[40:43], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v8, off, s[40:43], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v9, off, s[40:43], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x82000
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], v[4:5], s[0:3], 0 addr64 offset:176
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v6, off, s[40:43], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v7, off, s[40:43], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v8, off, s[40:43], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v9, off, s[40:43], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x81c00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], v[4:5], s[0:3], 0 addr64 offset:160
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v6, off, s[40:43], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v7, off, s[40:43], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v8, off, s[40:43], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v9, off, s[40:43], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x81800
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], v[4:5], s[0:3], 0 addr64 offset:144
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v6, off, s[40:43], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v7, off, s[40:43], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v8, off, s[40:43], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v9, off, s[40:43], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x81400
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], v[4:5], s[0:3], 0 addr64 offset:128
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v6, off, s[40:43], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v7, off, s[40:43], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v8, off, s[40:43], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v9, off, s[40:43], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x81000
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], v[4:5], s[0:3], 0 addr64 offset:112
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v6, off, s[40:43], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v7, off, s[40:43], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v8, off, s[40:43], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v9, off, s[40:43], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x80c00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], v[4:5], s[0:3], 0 addr64 offset:96
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v6, off, s[40:43], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v7, off, s[40:43], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v8, off, s[40:43], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v9, off, s[40:43], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x80400
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], v[4:5], s[0:3], 0 addr64 offset:80
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v6, off, s[40:43], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v7, off, s[40:43], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v8, off, s[40:43], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v9, off, s[40:43], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x80800
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], v[4:5], s[0:3], 0 addr64 offset:64
+; GFX6-NEXT:    buffer_store_dwordx4 v[17:20], v[4:5], s[0:3], 0 addr64 offset:48
+; GFX6-NEXT:    buffer_store_dwordx4 v[13:16], v[4:5], s[0:3], 0 addr64 offset:32
+; GFX6-NEXT:    s_waitcnt expcnt(2)
+; GFX6-NEXT:    buffer_load_dword v6, off, s[40:43], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v7, off, s[40:43], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v8, off, s[40:43], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v9, off, s[40:43], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], v[4:5], s[0:3], 0 addr64 offset:16
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[4:5], s[0:3], 0 addr64
+; GFX6-NEXT:    s_endpgm
+;
+; GFX9-FLATSCR-LABEL: test_limited_sgpr:
+; GFX9-FLATSCR:       ; %bb.0: ; %entry
+; GFX9-FLATSCR-NEXT:    s_load_dwordx4 s[36:39], s[0:1], 0x24
+; GFX9-FLATSCR-NEXT:    v_mbcnt_lo_u32_b32 v0, -1, 0
+; GFX9-FLATSCR-NEXT:    v_mbcnt_hi_u32_b32 v5, -1, v0
+; GFX9-FLATSCR-NEXT:    v_lshlrev_b32_e32 v0, 8, v5
+; GFX9-FLATSCR-NEXT:    s_add_u32 flat_scratch_lo, s2, s5
+; GFX9-FLATSCR-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[1:4], v0, s[38:39] offset:240
+; GFX9-FLATSCR-NEXT:    s_addc_u32 flat_scratch_hi, s3, 0
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x20b0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v6, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v7, 1
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[1:4], s0 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[8:11], v0, s[38:39] offset:224
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[1:4], v0, s[38:39] offset:208
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x20a0
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[1:4], s0 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[20:23], v0, s[38:39] offset:192
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[1:4], v0, s[38:39] offset:176
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x2090
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[1:4], s0 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[16:19], v0, s[38:39] offset:160
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[1:4], v0, s[38:39] offset:144
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x2080
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[1:4], s0 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[1:4], v0, s[38:39] offset:128
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x20c0
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[1:4], s0 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[1:4], v0, s[38:39] offset:112
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x2060
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[1:4], s0 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[1:4], v0, s[38:39] offset:96
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x2050
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[1:4], s0 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[1:4], v0, s[38:39] offset:80
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x2040
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[1:4], s0 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[1:4], v0, s[38:39] offset:64
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x2030
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[1:4], s0 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[1:4], v0, s[38:39] offset:48
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x2020
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[1:4], s0 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[1:4], v0, s[38:39] offset:32
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x2070
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[1:4], s0 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[1:4], v0, s[38:39] offset:16
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x2010
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[1:4], s0 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v0, s[38:39]
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v4, 16
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    v_lshl_add_u32 v4, v0, 13, v4
+; GFX9-FLATSCR-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX9-FLATSCR-NEXT:    scratch_store_dword v4, v7, off
+; GFX9-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX9-FLATSCR-NEXT:    ; def s[0:7]
+; GFX9-FLATSCR-NEXT:    ;;#ASMEND
+; GFX9-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX9-FLATSCR-NEXT:    ; def s[8:15]
+; GFX9-FLATSCR-NEXT:    ;;#ASMEND
+; GFX9-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX9-FLATSCR-NEXT:    ; def s[16:23]
+; GFX9-FLATSCR-NEXT:    ;;#ASMEND
+; GFX9-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX9-FLATSCR-NEXT:    ; def s[24:31]
+; GFX9-FLATSCR-NEXT:    ;;#ASMEND
+; GFX9-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX9-FLATSCR-NEXT:    ; def s[40:43]
+; GFX9-FLATSCR-NEXT:    ;;#ASMEND
+; GFX9-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX9-FLATSCR-NEXT:    ; def s[38:39]
+; GFX9-FLATSCR-NEXT:    ;;#ASMEND
+; GFX9-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX9-FLATSCR-NEXT:    ; def s[44:45]
+; GFX9-FLATSCR-NEXT:    ;;#ASMEND
+; GFX9-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX9-FLATSCR-NEXT:    ; def s33
+; GFX9-FLATSCR-NEXT:    ;;#ASMEND
+; GFX9-FLATSCR-NEXT:    s_and_saveexec_b64 s[34:35], vcc
+; GFX9-FLATSCR-NEXT:    s_cbranch_execz .LBB1_2
+; GFX9-FLATSCR-NEXT:  ; %bb.1: ; %bb0
+; GFX9-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX9-FLATSCR-NEXT:    ; use s[0:7],s[8:15],s[16:23],s[24:31],s[40:43],s[38:39],s[44:45]
+; GFX9-FLATSCR-NEXT:    ;;#ASMEND
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x20d0
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x20e0
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[16:19], s0 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x20f0
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[20:23], s0 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x2100
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[8:11], s0 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x2100
+; GFX9-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX9-FLATSCR-NEXT:    ;;#ASMEND
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[8:11], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x20f0
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[20:23], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x20e0
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[16:19], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x20d0
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX9-FLATSCR-NEXT:    ;;#ASMEND
+; GFX9-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX9-FLATSCR-NEXT:    ;;#ASMEND
+; GFX9-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX9-FLATSCR-NEXT:    ;;#ASMEND
+; GFX9-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX9-FLATSCR-NEXT:    ;;#ASMEND
+; GFX9-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX9-FLATSCR-NEXT:    ;;#ASMEND
+; GFX9-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX9-FLATSCR-NEXT:    ;;#ASMEND
+; GFX9-FLATSCR-NEXT:  .LBB1_2: ; %ret
+; GFX9-FLATSCR-NEXT:    s_or_b64 exec, exec, s[34:35]
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x20b0
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[12:15], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    v_lshlrev_b64 v[4:5], 8, v[5:6]
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v6, s37
+; GFX9-FLATSCR-NEXT:    v_add_co_u32_e32 v4, vcc, s36, v4
+; GFX9-FLATSCR-NEXT:    v_addc_co_u32_e32 v5, vcc, v6, v5, vcc
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x20a0
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[12:15], off offset:240
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[8:11], off offset:224
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[6:9], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x2090
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[6:9], off offset:208
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[20:23], off offset:192
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[20:23], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x2080
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[20:23], off offset:176
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[16:19], off offset:160
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[16:19], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x20c0
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[6:9], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x2060
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[12:15], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x2050
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(2)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[16:19], off offset:144
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(2)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[6:9], off offset:128
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[6:9], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x2040
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(3)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[12:15], off offset:112
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(1)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[6:9], off offset:96
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[6:9], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x2030
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[6:9], off offset:80
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[6:9], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x2020
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[6:9], off offset:64
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[6:9], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x2070
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[6:9], off offset:48
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[6:9], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x2010
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[6:9], off offset:32
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[6:9], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[6:9], off offset:16
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[0:3], off
+; GFX9-FLATSCR-NEXT:    s_endpgm
+;
+; GFX10-FLATSCR-LABEL: test_limited_sgpr:
+; GFX10-FLATSCR:       ; %bb.0: ; %entry
+; GFX10-FLATSCR-NEXT:    s_add_u32 s2, s2, s5
+; GFX10-FLATSCR-NEXT:    s_addc_u32 s3, s3, 0
+; GFX10-FLATSCR-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2
+; GFX10-FLATSCR-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3
+; GFX10-FLATSCR-NEXT:    s_load_dwordx4 s[36:39], s[0:1], 0x24
+; GFX10-FLATSCR-NEXT:    v_mbcnt_lo_u32_b32 v0, -1, 0
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v6, 0
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v7, 1
+; GFX10-FLATSCR-NEXT:    s_mov_b32 s33, exec_lo
+; GFX10-FLATSCR-NEXT:    v_mbcnt_hi_u32_b32 v5, -1, v0
+; GFX10-FLATSCR-NEXT:    v_lshlrev_b32_e32 v0, 8, v5
+; GFX10-FLATSCR-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-FLATSCR-NEXT:    s_clause 0xf
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[64:67], v0, s[38:39] offset:240
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[60:63], v0, s[38:39] offset:224
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[56:59], v0, s[38:39] offset:208
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[52:55], v0, s[38:39] offset:192
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[48:51], v0, s[38:39] offset:176
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[44:47], v0, s[38:39] offset:160
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[40:43], v0, s[38:39] offset:144
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[36:39], v0, s[38:39] offset:128
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[32:35], v0, s[38:39] offset:112
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[28:31], v0, s[38:39] offset:96
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[24:27], v0, s[38:39] offset:80
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[20:23], v0, s[38:39] offset:64
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[16:19], v0, s[38:39] offset:48
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[12:15], v0, s[38:39] offset:32
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[8:11], v0, s[38:39] offset:16
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v0, s[38:39]
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    v_lshl_add_u32 v4, v0, 13, 16
+; GFX10-FLATSCR-NEXT:    scratch_store_dword v4, v7, off
+; GFX10-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX10-FLATSCR-NEXT:    ; def s[0:7]
+; GFX10-FLATSCR-NEXT:    ;;#ASMEND
+; GFX10-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX10-FLATSCR-NEXT:    ; def s[8:15]
+; GFX10-FLATSCR-NEXT:    ;;#ASMEND
+; GFX10-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX10-FLATSCR-NEXT:    ; def s[16:23]
+; GFX10-FLATSCR-NEXT:    ;;#ASMEND
+; GFX10-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX10-FLATSCR-NEXT:    ; def s[24:31]
+; GFX10-FLATSCR-NEXT:    ;;#ASMEND
+; GFX10-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX10-FLATSCR-NEXT:    ; def s[40:43]
+; GFX10-FLATSCR-NEXT:    ;;#ASMEND
+; GFX10-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX10-FLATSCR-NEXT:    ; def s[34:35]
+; GFX10-FLATSCR-NEXT:    ;;#ASMEND
+; GFX10-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX10-FLATSCR-NEXT:    ; def s[38:39]
+; GFX10-FLATSCR-NEXT:    ;;#ASMEND
+; GFX10-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX10-FLATSCR-NEXT:    ; def s44
+; GFX10-FLATSCR-NEXT:    ;;#ASMEND
+; GFX10-FLATSCR-NEXT:    v_cmpx_eq_u32_e32 0, v0
+; GFX10-FLATSCR-NEXT:    s_cbranch_execz .LBB1_2
+; GFX10-FLATSCR-NEXT:  ; %bb.1: ; %bb0
+; GFX10-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX10-FLATSCR-NEXT:    ; use s[0:7],s[8:15],s[16:23],s[24:31],s[40:43],s[34:35],s[38:39]
+; GFX10-FLATSCR-NEXT:    ;;#ASMEND
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0x2010
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v88, v59
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v92, v63
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v87, v58
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v86, v57
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v85, v56
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v91, v62
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v90, v61
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v89, v60
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v60, v35
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[64:67], s0 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v68, v39
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v59, v34
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v58, v33
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v57, v32
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v67, v38
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v66, v37
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v65, v36
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v36, v11
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v72, v43
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v76, v47
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v80, v51
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v84, v55
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v33, v8
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v71, v42
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v70, v41
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v69, v40
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v40, v15
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v75, v46
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v74, v45
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v73, v44
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v44, v19
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v79, v50
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v78, v49
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v77, v48
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v48, v23
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v83, v54
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v82, v53
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v81, v52
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v52, v27
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v56, v31
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v35, v10
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v34, v9
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v37, v12
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v41, v16
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v45, v20
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v49, v24
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v53, v28
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v39, v14
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v38, v13
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v43, v18
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v42, v17
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v47, v22
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v46, v21
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v51, v26
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v50, v25
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v55, v30
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v54, v29
+; GFX10-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX10-FLATSCR-NEXT:    ;;#ASMEND
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v8, v33
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v28, v53
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v24, v49
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v20, v45
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v16, v41
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v12, v37
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v9, v34
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v10, v35
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v11, v36
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v32, v57
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v29, v54
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v30, v55
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v31, v56
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v25, v50
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v26, v51
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v27, v52
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v21, v46
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v22, v47
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v23, v48
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v17, v42
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v18, v43
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v19, v44
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v13, v38
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v14, v39
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v15, v40
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v33, v58
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v34, v59
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v35, v60
+; GFX10-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX10-FLATSCR-NEXT:    ;;#ASMEND
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0x2010
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v36, v65
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v37, v66
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v38, v67
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v39, v68
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[64:67], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v60, v89
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v56, v85
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v52, v81
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v48, v77
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v44, v73
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v40, v69
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v61, v90
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v62, v91
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v63, v92
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v57, v86
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v58, v87
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v59, v88
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v53, v82
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v54, v83
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v55, v84
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v49, v78
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v50, v79
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v51, v80
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v45, v74
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v46, v75
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v47, v76
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v41, v70
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v42, v71
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v43, v72
+; GFX10-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX10-FLATSCR-NEXT:    ;;#ASMEND
+; GFX10-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX10-FLATSCR-NEXT:    ;;#ASMEND
+; GFX10-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX10-FLATSCR-NEXT:    ;;#ASMEND
+; GFX10-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX10-FLATSCR-NEXT:    ;;#ASMEND
+; GFX10-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX10-FLATSCR-NEXT:    ;;#ASMEND
+; GFX10-FLATSCR-NEXT:  .LBB1_2: ; %ret
+; GFX10-FLATSCR-NEXT:    s_or_b32 exec_lo, exec_lo, s33
+; GFX10-FLATSCR-NEXT:    v_lshlrev_b64 v[4:5], 8, v[5:6]
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v4, vcc_lo, s36, v4
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v5, vcc_lo, s37, v5, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[64:67], off offset:240
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[60:63], off offset:224
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[56:59], off offset:208
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[52:55], off offset:192
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[48:51], off offset:176
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[44:47], off offset:160
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[40:43], off offset:144
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[36:39], off offset:128
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[32:35], off offset:112
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[28:31], off offset:96
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[24:27], off offset:80
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[20:23], off offset:64
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[16:19], off offset:48
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[12:15], off offset:32
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[8:11], off offset:16
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[0:3], off
+; GFX10-FLATSCR-NEXT:    s_endpgm
 entry:
   %lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
   %tid = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %lo)
@@ -173,3 +11150,6 @@ declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #1
 
 attributes #0 = { "amdgpu-waves-per-eu"="10,10" }
 attributes #1 = { nounwind readnone }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}
+; FLATSCR: {{.*}}

diff  --git a/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir b/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir
index 69442032f37e..3914c3b2fe07 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir
@@ -48,23 +48,23 @@ body:             |
     ; GFX9-NEXT: $sgpr12 = S_ADD_U32 $sgpr12, $sgpr9, implicit-def $scc, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15
     ; GFX9-NEXT: $sgpr13 = S_ADDC_U32 $sgpr13, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15
     ; GFX9-NEXT: $vcc = IMPLICIT_DEF
-    ; GFX9-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
-    ; GFX9-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
-    ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GFX9-NEXT: $vgpr0 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr0, implicit $vcc
-    ; GFX9-NEXT: $vgpr0 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr0, implicit $vcc
-    ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GFX9-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GFX9-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
+    ; GFX9-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec
+    ; GFX9-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr2
+    ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GFX9-NEXT: $vgpr2 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr2, implicit $vcc
+    ; GFX9-NEXT: $vgpr2 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr2, implicit $vcc
+    ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+    ; GFX9-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GFX9-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr2
     ; GFX9-NEXT: $vcc = IMPLICIT_DEF
-    ; GFX9-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
-    ; GFX9-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
-    ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GFX9-NEXT: $vgpr0 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr0, implicit $vcc
-    ; GFX9-NEXT: $vgpr0 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr0, implicit killed $vcc
-    ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GFX9-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GFX9-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
+    ; GFX9-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec
+    ; GFX9-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr1
+    ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GFX9-NEXT: $vgpr1 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr1, implicit $vcc
+    ; GFX9-NEXT: $vgpr1 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr1, implicit killed $vcc
+    ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+    ; GFX9-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GFX9-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr1
     ; GFX9-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GFX9-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
     ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
@@ -84,23 +84,23 @@ body:             |
     ; GFX10-NEXT: $sgpr96 = S_ADD_U32 $sgpr96, $sgpr9, implicit-def $scc, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99
     ; GFX10-NEXT: $sgpr97 = S_ADDC_U32 $sgpr97, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99
     ; GFX10-NEXT: $vcc = IMPLICIT_DEF
-    ; GFX10-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
-    ; GFX10-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
-    ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GFX10-NEXT: $vgpr0 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr0, implicit $vcc
-    ; GFX10-NEXT: $vgpr0 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr0, implicit $vcc
-    ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GFX10-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GFX10-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
+    ; GFX10-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec
+    ; GFX10-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr2
+    ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GFX10-NEXT: $vgpr2 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr2, implicit $vcc
+    ; GFX10-NEXT: $vgpr2 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr2, implicit $vcc
+    ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+    ; GFX10-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GFX10-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr2
     ; GFX10-NEXT: $vcc = IMPLICIT_DEF
-    ; GFX10-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
-    ; GFX10-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
-    ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GFX10-NEXT: $vgpr0 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr0, implicit $vcc
-    ; GFX10-NEXT: $vgpr0 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr0, implicit killed $vcc
-    ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GFX10-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GFX10-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
+    ; GFX10-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec
+    ; GFX10-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr1
+    ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GFX10-NEXT: $vgpr1 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr1, implicit $vcc
+    ; GFX10-NEXT: $vgpr1 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr1, implicit killed $vcc
+    ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+    ; GFX10-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GFX10-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr1
     ; GFX10-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GFX10-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
     ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
@@ -114,23 +114,23 @@ body:             |
     ; GFX11-NEXT: {{  $}}
     ; GFX11-NEXT: $sgpr33 = S_MOV_B32 0
     ; GFX11-NEXT: $vcc = IMPLICIT_DEF
-    ; GFX11-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
-    ; GFX11-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
-    ; GFX11-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GFX11-NEXT: $vgpr0 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr0, implicit $vcc
-    ; GFX11-NEXT: $vgpr0 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr0, implicit $vcc
-    ; GFX11-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
-    ; GFX11-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GFX11-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
+    ; GFX11-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec
+    ; GFX11-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr2
+    ; GFX11-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GFX11-NEXT: $vgpr2 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr2, implicit $vcc
+    ; GFX11-NEXT: $vgpr2 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr2, implicit $vcc
+    ; GFX11-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
+    ; GFX11-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GFX11-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr2
     ; GFX11-NEXT: $vcc = IMPLICIT_DEF
-    ; GFX11-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
-    ; GFX11-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
-    ; GFX11-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GFX11-NEXT: $vgpr0 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr0, implicit $vcc
-    ; GFX11-NEXT: $vgpr0 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr0, implicit killed $vcc
-    ; GFX11-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
-    ; GFX11-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GFX11-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
+    ; GFX11-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec
+    ; GFX11-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr1
+    ; GFX11-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GFX11-NEXT: $vgpr1 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr1, implicit $vcc
+    ; GFX11-NEXT: $vgpr1 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr1, implicit killed $vcc
+    ; GFX11-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
+    ; GFX11-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GFX11-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr1
     ; GFX11-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GFX11-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
     ; GFX11-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)

diff  --git a/llvm/test/CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir b/llvm/test/CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir
index 2dbfd58a3db6..2c4d9f009447 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir
@@ -412,6 +412,7 @@ body:             |
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT: bb.1:
   ; MUBUF-NEXT:   successors: %bb.2(0x80000000)
+  ; MUBUF-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT:   S_NOP 0
   ; MUBUF-NEXT: {{  $}}
@@ -432,6 +433,7 @@ body:             |
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT: bb.1:
   ; GFX9-FLATSCR-NEXT:   successors: %bb.2(0x80000000)
+  ; GFX9-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT:   S_NOP 0
   ; GFX9-FLATSCR-NEXT: {{  $}}
@@ -451,6 +453,7 @@ body:             |
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT: bb.1:
   ; GFX10-FLATSCR-NEXT:   successors: %bb.2(0x80000000)
+  ; GFX10-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT:   S_NOP 0
   ; GFX10-FLATSCR-NEXT: {{  $}}
@@ -470,6 +473,7 @@ body:             |
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
   ; VMEM-GFX8-NEXT:   successors: %bb.2(0x80000000)
+  ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_NOP 0
   ; VMEM-GFX8-NEXT: {{  $}}
@@ -483,6 +487,7 @@ body:             |
     S_CBRANCH_SCC1 %bb.2, implicit $scc
 
   bb.1:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
     S_NOP 0
 
   bb.2:
@@ -517,6 +522,7 @@ body:             |
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT: bb.1:
   ; MUBUF-NEXT:   successors: %bb.2(0x80000000)
+  ; MUBUF-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT:   S_NOP 0
   ; MUBUF-NEXT: {{  $}}
@@ -537,6 +543,7 @@ body:             |
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT: bb.1:
   ; GFX9-FLATSCR-NEXT:   successors: %bb.2(0x80000000)
+  ; GFX9-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT:   S_NOP 0
   ; GFX9-FLATSCR-NEXT: {{  $}}
@@ -556,6 +563,7 @@ body:             |
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT: bb.1:
   ; GFX10-FLATSCR-NEXT:   successors: %bb.2(0x80000000)
+  ; GFX10-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT:   S_NOP 0
   ; GFX10-FLATSCR-NEXT: {{  $}}
@@ -576,6 +584,7 @@ body:             |
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
   ; VMEM-GFX8-NEXT:   successors: %bb.2(0x80000000)
+  ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_NOP 0
   ; VMEM-GFX8-NEXT: {{  $}}
@@ -589,6 +598,7 @@ body:             |
     S_CBRANCH_SCC1 %bb.2, implicit $scc
 
   bb.1:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
     S_NOP 0
 
   bb.2:
@@ -624,6 +634,7 @@ body:             |
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT: bb.1:
   ; MUBUF-NEXT:   successors: %bb.2(0x80000000)
+  ; MUBUF-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT:   S_NOP 0
   ; MUBUF-NEXT: {{  $}}
@@ -644,6 +655,7 @@ body:             |
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT: bb.1:
   ; GFX9-FLATSCR-NEXT:   successors: %bb.2(0x80000000)
+  ; GFX9-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT:   S_NOP 0
   ; GFX9-FLATSCR-NEXT: {{  $}}
@@ -663,6 +675,7 @@ body:             |
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT: bb.1:
   ; GFX10-FLATSCR-NEXT:   successors: %bb.2(0x80000000)
+  ; GFX10-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT:   S_NOP 0
   ; GFX10-FLATSCR-NEXT: {{  $}}
@@ -684,6 +697,7 @@ body:             |
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
   ; VMEM-GFX8-NEXT:   successors: %bb.2(0x80000000)
+  ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_NOP 0
   ; VMEM-GFX8-NEXT: {{  $}}
@@ -697,6 +711,7 @@ body:             |
     S_CBRANCH_SCC1 %bb.2, implicit $scc
 
   bb.1:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
     S_NOP 0
 
   bb.2:
@@ -730,6 +745,7 @@ body:             |
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT: bb.1:
   ; MUBUF-NEXT:   successors: %bb.2(0x80000000)
+  ; MUBUF-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT:   S_NOP 0
   ; MUBUF-NEXT: {{  $}}
@@ -750,6 +766,7 @@ body:             |
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT: bb.1:
   ; GFX9-FLATSCR-NEXT:   successors: %bb.2(0x80000000)
+  ; GFX9-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT:   S_NOP 0
   ; GFX9-FLATSCR-NEXT: {{  $}}
@@ -769,6 +786,7 @@ body:             |
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT: bb.1:
   ; GFX10-FLATSCR-NEXT:   successors: %bb.2(0x80000000)
+  ; GFX10-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT:   S_NOP 0
   ; GFX10-FLATSCR-NEXT: {{  $}}
@@ -788,6 +806,7 @@ body:             |
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
   ; VMEM-GFX8-NEXT:   successors: %bb.2(0x80000000)
+  ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_NOP 0
   ; VMEM-GFX8-NEXT: {{  $}}
@@ -801,6 +820,7 @@ body:             |
     S_CBRANCH_SCC1 %bb.2, implicit $scc
 
   bb.1:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
     S_NOP 0
 
   bb.2:
@@ -835,6 +855,7 @@ body:             |
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT: bb.1:
   ; MUBUF-NEXT:   successors: %bb.2(0x80000000)
+  ; MUBUF-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT:   S_NOP 0
   ; MUBUF-NEXT: {{  $}}
@@ -855,6 +876,7 @@ body:             |
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT: bb.1:
   ; GFX9-FLATSCR-NEXT:   successors: %bb.2(0x80000000)
+  ; GFX9-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT:   S_NOP 0
   ; GFX9-FLATSCR-NEXT: {{  $}}
@@ -874,6 +896,7 @@ body:             |
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT: bb.1:
   ; GFX10-FLATSCR-NEXT:   successors: %bb.2(0x80000000)
+  ; GFX10-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT:   S_NOP 0
   ; GFX10-FLATSCR-NEXT: {{  $}}
@@ -894,6 +917,7 @@ body:             |
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
   ; VMEM-GFX8-NEXT:   successors: %bb.2(0x80000000)
+  ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_NOP 0
   ; VMEM-GFX8-NEXT: {{  $}}
@@ -907,6 +931,7 @@ body:             |
     S_CBRANCH_SCC1 %bb.2, implicit $scc
 
   bb.1:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
     S_NOP 0
 
   bb.2:
@@ -942,6 +967,7 @@ body:             |
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT: bb.1:
   ; MUBUF-NEXT:   successors: %bb.2(0x80000000)
+  ; MUBUF-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT:   S_NOP 0
   ; MUBUF-NEXT: {{  $}}
@@ -962,6 +988,7 @@ body:             |
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT: bb.1:
   ; GFX9-FLATSCR-NEXT:   successors: %bb.2(0x80000000)
+  ; GFX9-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT:   S_NOP 0
   ; GFX9-FLATSCR-NEXT: {{  $}}
@@ -981,6 +1008,7 @@ body:             |
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT: bb.1:
   ; GFX10-FLATSCR-NEXT:   successors: %bb.2(0x80000000)
+  ; GFX10-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT:   S_NOP 0
   ; GFX10-FLATSCR-NEXT: {{  $}}
@@ -1002,6 +1030,7 @@ body:             |
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
   ; VMEM-GFX8-NEXT:   successors: %bb.2(0x80000000)
+  ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_NOP 0
   ; VMEM-GFX8-NEXT: {{  $}}
@@ -1015,6 +1044,7 @@ body:             |
     S_CBRANCH_SCC1 %bb.2, implicit $scc
 
   bb.1:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
     S_NOP 0
 
   bb.2:
@@ -1144,6 +1174,7 @@ body:             |
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT: bb.1:
   ; MUBUF-NEXT:   successors: %bb.2(0x80000000)
+  ; MUBUF-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT:   S_NOP 0
   ; MUBUF-NEXT: {{  $}}
@@ -1164,6 +1195,7 @@ body:             |
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT: bb.1:
   ; GFX9-FLATSCR-NEXT:   successors: %bb.2(0x80000000)
+  ; GFX9-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT:   S_NOP 0
   ; GFX9-FLATSCR-NEXT: {{  $}}
@@ -1184,6 +1216,7 @@ body:             |
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT: bb.1:
   ; GFX10-FLATSCR-NEXT:   successors: %bb.2(0x80000000)
+  ; GFX10-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT:   S_NOP 0
   ; GFX10-FLATSCR-NEXT: {{  $}}
@@ -1203,6 +1236,7 @@ body:             |
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
   ; VMEM-GFX8-NEXT:   successors: %bb.2(0x80000000)
+  ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_NOP 0
   ; VMEM-GFX8-NEXT: {{  $}}
@@ -1217,6 +1251,7 @@ body:             |
     S_CBRANCH_SCC1 %bb.2, implicit $scc
 
   bb.1:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
     S_NOP 0
 
   bb.2:


        


More information about the llvm-commits mailing list