[llvm] 32bd757 - PEI should be able to use backward walk in replaceFrameIndicesBackward.

Alexander Timofeev via llvm-commits llvm-commits at lists.llvm.org
Fri Nov 18 06:57:53 PST 2022


Author: Alexander Timofeev
Date: 2022-11-18T15:57:34+01:00
New Revision: 32bd75716c27d1a094c2436529b596ce1547f6eb

URL: https://github.com/llvm/llvm-project/commit/32bd75716c27d1a094c2436529b596ce1547f6eb
DIFF: https://github.com/llvm/llvm-project/commit/32bd75716c27d1a094c2436529b596ce1547f6eb.diff

LOG: PEI should be able to use backward walk in replaceFrameIndicesBackward.

The backward register scavenger has correct register
liveness information. PEI should leverage the backward register scavenger.

Reviewed By: arsenm

Differential Revision: https://reviews.llvm.org/D137574

Added: 
    

Modified: 
    llvm/include/llvm/CodeGen/TargetRegisterInfo.h
    llvm/lib/CodeGen/PrologEpilogInserter.cpp
    llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
    llvm/lib/Target/AArch64/AArch64RegisterInfo.h
    llvm/lib/Target/AMDGPU/R600RegisterInfo.cpp
    llvm/lib/Target/AMDGPU/R600RegisterInfo.h
    llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
    llvm/lib/Target/AMDGPU/SIRegisterInfo.h
    llvm/lib/Target/ARC/ARCRegisterInfo.h
    llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
    llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
    llvm/lib/Target/ARM/ThumbRegisterInfo.cpp
    llvm/lib/Target/ARM/ThumbRegisterInfo.h
    llvm/lib/Target/AVR/AVRRegisterInfo.cpp
    llvm/lib/Target/AVR/AVRRegisterInfo.h
    llvm/lib/Target/BPF/BPFRegisterInfo.cpp
    llvm/lib/Target/BPF/BPFRegisterInfo.h
    llvm/lib/Target/CSKY/CSKYRegisterInfo.cpp
    llvm/lib/Target/CSKY/CSKYRegisterInfo.h
    llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp
    llvm/lib/Target/Hexagon/HexagonRegisterInfo.h
    llvm/lib/Target/Lanai/LanaiRegisterInfo.cpp
    llvm/lib/Target/Lanai/LanaiRegisterInfo.h
    llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp
    llvm/lib/Target/LoongArch/LoongArchRegisterInfo.h
    llvm/lib/Target/M68k/M68kRegisterInfo.cpp
    llvm/lib/Target/M68k/M68kRegisterInfo.h
    llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp
    llvm/lib/Target/MSP430/MSP430RegisterInfo.h
    llvm/lib/Target/Mips/MipsRegisterInfo.cpp
    llvm/lib/Target/Mips/MipsRegisterInfo.h
    llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
    llvm/lib/Target/NVPTX/NVPTXRegisterInfo.h
    llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
    llvm/lib/Target/PowerPC/PPCRegisterInfo.h
    llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
    llvm/lib/Target/RISCV/RISCVRegisterInfo.h
    llvm/lib/Target/SPIRV/SPIRVRegisterInfo.h
    llvm/lib/Target/Sparc/SparcRegisterInfo.cpp
    llvm/lib/Target/Sparc/SparcRegisterInfo.h
    llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
    llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
    llvm/lib/Target/VE/VERegisterInfo.cpp
    llvm/lib/Target/VE/VERegisterInfo.h
    llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
    llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h
    llvm/lib/Target/X86/X86RegisterInfo.cpp
    llvm/lib/Target/X86/X86RegisterInfo.h
    llvm/lib/Target/XCore/XCoreRegisterInfo.cpp
    llvm/lib/Target/XCore/XCoreRegisterInfo.h
    llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
    llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir
    llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll
    llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll
    llvm/test/CodeGen/AMDGPU/flat-scratch.ll
    llvm/test/CodeGen/AMDGPU/frame-index.mir
    llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll
    llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll
    llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
    llvm/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir
    llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir
    llvm/test/CodeGen/AMDGPU/scratch-simple.ll
    llvm/test/CodeGen/AMDGPU/sgpr-spill-to-vmem-scc-clobber.mir
    llvm/test/CodeGen/AMDGPU/sgpr-spill-vmem-large-frame.mir
    llvm/test/CodeGen/AMDGPU/sgpr-spill.mir
    llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll
    llvm/test/CodeGen/AMDGPU/spill-offset-calculation.ll
    llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll
    llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir
    llvm/test/CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir
    llvm/unittests/CodeGen/MFCommon.inc

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
index d55f88dd50e57..4ad48f79ee427 100644
--- a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
@@ -1028,6 +1028,12 @@ class TargetRegisterInfo : public MCRegisterInfo {
     return false;
   }
 
+  /// Process frame indices in reverse block order. This changes the behavior of
+  /// the RegScavenger passed to eliminateFrameIndex. If this is true targets
+  /// should scavengeRegisterBackwards in eliminateFrameIndex. New targets
+  /// should prefer reverse scavenging behavior.
+  virtual bool supportsBackwardScavenger() const { return false; }
+
   /// This method must be overriden to eliminate abstract frame indices from
   /// instructions which may use them. The instruction referenced by the
   /// iterator contains an MO_FrameIndex operand which must be eliminated by
@@ -1035,7 +1041,9 @@ class TargetRegisterInfo : public MCRegisterInfo {
   /// as long as it keeps the iterator pointing at the finished product.
   /// SPAdj is the SP adjustment due to call frame setup instruction.
   /// FIOperandNum is the FI operand number.
-  virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI,
+  /// Returns true if the current instruction was removed and the iterator
+  /// is not longer valid
+  virtual bool eliminateFrameIndex(MachineBasicBlock::iterator MI,
                                    int SPAdj, unsigned FIOperandNum,
                                    RegScavenger *RS = nullptr) const = 0;
 

diff  --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index be0dd7fe4a528..dbfd8b936da52 100644
--- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -128,8 +128,16 @@ class PEI : public MachineFunctionPass {
   void replaceFrameIndices(MachineFunction &MF);
   void replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF,
                            int &SPAdj);
+  // Frame indices in debug values are encoded in a target independent
+  // way with simply the frame index and offset rather than any
+  // target-specific addressing mode.
   bool replaceFrameIndexDebugInstr(MachineFunction &MF, MachineInstr &MI,
                                    unsigned OpIdx, int SPAdj = 0);
+  // Does same as replaceFrameIndices but using the backward MIR walk and
+  // backward register scavenger walk. Does not yet support call sequence
+  // processing.
+  void replaceFrameIndicesBackward(MachineBasicBlock *BB, MachineFunction &MF,
+                                   int &SPAdj);
 
   void insertPrologEpilogCode(MachineFunction &MF);
   void insertZeroCallUsedRegs(MachineFunction &MF);
@@ -1438,6 +1446,72 @@ bool PEI::replaceFrameIndexDebugInstr(MachineFunction &MF, MachineInstr &MI,
   return false;
 }
 
+void PEI::replaceFrameIndicesBackward(MachineBasicBlock *BB,
+                                      MachineFunction &MF, int &SPAdj) {
+  assert(MF.getSubtarget().getRegisterInfo() &&
+         "getRegisterInfo() must be implemented!");
+
+  const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+
+  RS->enterBasicBlockEnd(*BB);
+
+  for (MachineInstr &MI : make_early_inc_range(reverse(*BB))) {
+
+    // Register scavenger backward step
+    MachineBasicBlock::iterator Step(MI);
+    for (unsigned i = 0; i != MI.getNumOperands(); ++i) {
+      if (!MI.getOperand(i).isFI())
+        continue;
+
+      if (replaceFrameIndexDebugInstr(MF, MI, i, SPAdj))
+        continue;
+
+      // If this instruction has a FrameIndex operand, we need to
+      // use that target machine register info object to eliminate
+      // it.
+
+      // TRI.eliminateFrameIndex may lower the frame index to a sequence of
+      // instructions. It also can remove/change instructions passed by the
+      // iterator and invalidate the iterator. We have to take care of this. For
+      // that we support two iterators: *Step* - points to the position up to
+      // which the scavenger should scan by the next iteration to have liveness
+      // information up to date. *Curr* - keeps track of the correct RS->MBBI -
+      // the scan start point. It points to the currently processed instruction
+      // right before the frame lowering.
+      //
+      // ITERATORS WORK AS FOLLOWS:
+      // *Step* is shifted one step back right before the frame lowering and
+      // one step forward right after it. No matter how many instructions were
+      // inserted, *Step* will be right after the position which is going to be
+      // processed in the next iteration, thus, in the correct position for the
+      // scavenger to go up to.
+      // *Curr* is shifted one step forward right before calling
+      // TRI.eliminateFrameIndex and one step backward after. Thus, we make sure
+      // it points right to the position that is the correct starting point for
+      // the scavenger to scan.
+      MachineBasicBlock::iterator Curr = ++RS->getCurrentPosition();
+
+      // Shift back
+      --Step;
+
+      bool Removed = TRI.eliminateFrameIndex(MI, SPAdj, i, RS);
+      // Restore to unify logic with a shift back that happens in the end of
+      // the outer loop.
+      ++Step;
+      RS->skipTo(--Curr);
+      if (Removed)
+        break;
+    }
+
+    // Shift it to make RS collect reg info up to the current instruction.
+    if (Step != BB->begin())
+      Step--;
+
+    // Update register states.
+    RS->backward(Step);
+  }
+}
+
 void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF,
                               int &SPAdj) {
   assert(MF.getSubtarget().getRegisterInfo() &&
@@ -1446,6 +1520,9 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF,
   const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
   const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
 
+  if (RS && TRI.supportsBackwardScavenger())
+    return replaceFrameIndicesBackward(BB, MF, SPAdj);
+
   if (RS && FrameIndexEliminationScavenging)
     RS->enterBasicBlock(*BB);
 
@@ -1466,9 +1543,6 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF,
       if (!MI.getOperand(i).isFI())
         continue;
 
-      // Frame indices in debug values are encoded in a target independent
-      // way with simply the frame index and offset rather than any
-      // target-specific addressing mode.
       if (replaceFrameIndexDebugInstr(MF, MI, i, SPAdj))
         continue;
 

diff  --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
index d0fb7ec2beb22..f6d53d3e33abb 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -817,7 +817,7 @@ void AArch64RegisterInfo::getOffsetOpcodes(
   }
 }
 
-void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+bool AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
                                               int SPAdj, unsigned FIOperandNum,
                                               RegScavenger *RS) const {
   assert(SPAdj == 0 && "Unexpected");
@@ -845,7 +845,7 @@ void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
     Offset += StackOffset::getFixed(MI.getOperand(FIOperandNum + 1).getImm());
     MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false /*isDef*/);
     MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset.getFixed());
-    return;
+    return false;
   }
 
   if (MI.getOpcode() == TargetOpcode::LOCAL_ESCAPE) {
@@ -854,7 +854,7 @@ void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
     assert(!Offset.getScalable() &&
            "Frame offsets with a scalable component are not supported");
     FI.ChangeToImmediate(Offset.getFixed());
-    return;
+    return false;
   }
 
   StackOffset Offset;
@@ -884,7 +884,7 @@ void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
           .addImm(0);
       MI.getOperand(FIOperandNum)
           .ChangeToRegister(ScratchReg, false, false, true);
-      return;
+      return false;
     }
     FrameReg = AArch64::SP;
     Offset = StackOffset::getFixed(MFI.getObjectOffset(FrameIndex) +
@@ -896,7 +896,7 @@ void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
 
   // Modify MI as necessary to handle as much of 'Offset' as possible
   if (rewriteAArch64FrameIndex(MI, FIOperandNum, FrameReg, Offset, TII))
-    return;
+    return true;
 
   assert((!RS || !RS->isScavengingFrameIndex(FrameIndex)) &&
          "Emergency spill slot is out of reach");
@@ -907,6 +907,7 @@ void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   Register ScratchReg =
       createScratchRegisterForInstruction(MI, FIOperandNum, TII);
   emitFrameOffset(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg, Offset, TII);
+  return false;
 }
 
 unsigned AArch64RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,

diff  --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.h b/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
index 65636bf0173cf..4ffd4142f74be 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
@@ -116,7 +116,7 @@ class AArch64RegisterInfo final : public AArch64GenRegisterInfo {
                                         int64_t Offset) const override;
   void resolveFrameIndex(MachineInstr &MI, Register BaseReg,
                          int64_t Offset) const override;
-  void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
+  bool eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
                            unsigned FIOperandNum,
                            RegScavenger *RS = nullptr) const override;
   bool cannotEliminateFrame(const MachineFunction &MF) const;

diff  --git a/llvm/lib/Target/AMDGPU/R600RegisterInfo.cpp b/llvm/lib/Target/AMDGPU/R600RegisterInfo.cpp
index 941f54cd7fe5e..e782f4d65925b 100644
--- a/llvm/lib/Target/AMDGPU/R600RegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/R600RegisterInfo.cpp
@@ -103,7 +103,7 @@ bool R600RegisterInfo::isPhysRegLiveAcrossClauses(Register Reg) const {
   }
 }
 
-void R600RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
+bool R600RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
                                            int SPAdj,
                                            unsigned FIOperandNum,
                                            RegScavenger *RS) const {

diff  --git a/llvm/lib/Target/AMDGPU/R600RegisterInfo.h b/llvm/lib/Target/AMDGPU/R600RegisterInfo.h
index 1308e9fff1fe3..2372c5361611f 100644
--- a/llvm/lib/Target/AMDGPU/R600RegisterInfo.h
+++ b/llvm/lib/Target/AMDGPU/R600RegisterInfo.h
@@ -47,7 +47,7 @@ struct R600RegisterInfo final : public R600GenRegisterInfo {
   // another.
   bool isPhysRegLiveAcrossClauses(Register Reg) const;
 
-  void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
+  bool eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
                            unsigned FIOperandNum,
                            RegScavenger *RS = nullptr) const override;
 

diff  --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 3d96d056bbb78..354e40d4f5bc3 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -1364,7 +1364,7 @@ void SIRegisterInfo::buildSpillLoadStore(
     // TODO: Clobbering SCC is not necessary for scratch instructions in the
     // entry.
     if (RS) {
-      SOffset = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0, false);
+      SOffset = RS->scavengeRegisterBackwards(AMDGPU::SGPR_32RegClass, MI, false, 0, false);
 
       // Piggy back on the liveness scan we just did see if SCC is dead.
       CanClobberSCC = !RS->isRegUsed(AMDGPU::SCC);
@@ -1385,7 +1385,7 @@ void SIRegisterInfo::buildSpillLoadStore(
       UseVGPROffset = true;
 
       if (RS) {
-        TmpOffsetVGPR = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
+        TmpOffsetVGPR = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass, MI, false, 0);
       } else {
         assert(LiveRegs);
         for (MCRegister Reg : AMDGPU::VGPR_32RegClass) {
@@ -1961,7 +1961,7 @@ bool SIRegisterInfo::eliminateSGPRToVGPRSpillFrameIndex(
   }
 }
 
-void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
+bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
                                         int SPAdj, unsigned FIOperandNum,
                                         RegScavenger *RS) const {
   MachineFunction *MF = MI->getParent()->getParent();
@@ -1992,8 +1992,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
     case AMDGPU::SI_SPILL_S96_SAVE:
     case AMDGPU::SI_SPILL_S64_SAVE:
     case AMDGPU::SI_SPILL_S32_SAVE: {
-      spillSGPR(MI, Index, RS);
-      break;
+      return spillSGPR(MI, Index, RS);
     }
 
     // SGPR register restore
@@ -2007,8 +2006,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
     case AMDGPU::SI_SPILL_S96_RESTORE:
     case AMDGPU::SI_SPILL_S64_RESTORE:
     case AMDGPU::SI_SPILL_S32_RESTORE: {
-      restoreSGPR(MI, Index, RS);
-      break;
+      return restoreSGPR(MI, Index, RS);
     }
 
     // VGPR register spill
@@ -2056,7 +2054,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
           *MI->memoperands_begin(), RS);
       MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode()));
       MI->eraseFromParent();
-      break;
+      return true;
     }
     case AMDGPU::SI_SPILL_V32_RESTORE:
     case AMDGPU::SI_SPILL_V64_RESTORE:
@@ -2101,7 +2099,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
           TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
           *MI->memoperands_begin(), RS);
       MI->eraseFromParent();
-      break;
+      return true;
     }
 
     default: {
@@ -2120,7 +2118,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
             FIOp.ChangeToRegister(FrameReg, false);
 
           if (!Offset)
-            return;
+            return false;
 
           MachineOperand *OffsetOp =
             TII->getNamedOperand(*MI, AMDGPU::OpName::offset);
@@ -2129,7 +2127,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
                                      SIInstrFlags::FlatScratch)) {
             OffsetOp->setImm(NewOffset);
             if (FrameReg)
-              return;
+              return false;
             Offset = 0;
           }
 
@@ -2167,7 +2165,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
                 MI->tieOperands(NewVDst, NewVDstIn);
               }
               MI->setDesc(TII->get(NewOpc));
-              return;
+              return false;
             }
           }
         }
@@ -2175,7 +2173,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
         if (!FrameReg) {
           FIOp.ChangeToImmediate(Offset);
           if (TII->isImmOperandLegal(*MI, FIOperandNum, FIOp))
-            return;
+            return false;
         }
 
         // We need to use register here. Check if we can use an SGPR or need
@@ -2185,7 +2183,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
 
         if (!Offset && FrameReg && UseSGPR) {
           FIOp.setReg(FrameReg);
-          return;
+          return false;
         }
 
         const TargetRegisterClass *RC = UseSGPR ? &AMDGPU::SReg_32_XM0RegClass
@@ -2203,7 +2201,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
           else
             MIB.addImm(Offset);
 
-          return;
+          return false;
         }
 
         Register TmpSReg =
@@ -2239,7 +2237,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
               .addImm(-Offset);
         }
 
-        return;
+        return false;
       }
 
       bool IsMUBUF = TII->isMUBUF(*MI);
@@ -2248,7 +2246,8 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
         // Convert to a swizzled stack address by scaling by the wave size.
         // In an entry function/kernel the offset is already swizzled.
         bool IsSALU = isSGPRClass(TII->getOpRegClass(*MI, FIOperandNum));
-        bool LiveSCC = RS->isRegUsed(AMDGPU::SCC);
+        bool LiveSCC =
+            RS->isRegUsed(AMDGPU::SCC) && !MI->definesRegister(AMDGPU::SCC);
         const TargetRegisterClass *RC = IsSALU && !LiveSCC
                                             ? &AMDGPU::SReg_32RegClass
                                             : &AMDGPU::VGPR_32RegClass;
@@ -2352,11 +2351,12 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
         }
 
         // Don't introduce an extra copy if we're just materializing in a mov.
-        if (IsCopy)
+        if (IsCopy) {
           MI->eraseFromParent();
-        else
-          FIOp.ChangeToRegister(ResultReg, false, false, true);
-        return;
+          return true;
+        }
+        FIOp.ChangeToRegister(ResultReg, false, false, true);
+        return false;
       }
 
       if (IsMUBUF) {
@@ -2379,7 +2379,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
         if (SIInstrInfo::isLegalMUBUFImmOffset(NewOffset) &&
             buildMUBUFOffsetLoadStore(ST, FrameInfo, MI, Index, NewOffset)) {
           MI->eraseFromParent();
-          return;
+          return true;
         }
       }
 
@@ -2395,6 +2395,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
       }
     }
   }
+  return false;
 }
 
 StringRef SIRegisterInfo::getRegAsmName(MCRegister Reg) const {

diff  --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
index 0e260c8016fcf..17ce8d21980fd 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -149,7 +149,11 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
                           MachineBasicBlock &RestoreMBB, Register SGPR,
                           RegScavenger *RS) const;
 
-  void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
+  bool supportsBackwardScavenger() const override {
+    return true;
+  }
+
+  bool eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
                            unsigned FIOperandNum,
                            RegScavenger *RS) const override;
 

diff  --git a/llvm/lib/Target/ARC/ARCRegisterInfo.h b/llvm/lib/Target/ARC/ARCRegisterInfo.h
index b1ae6b69398fd..ea82289022eb5 100644
--- a/llvm/lib/Target/ARC/ARCRegisterInfo.h
+++ b/llvm/lib/Target/ARC/ARCRegisterInfo.h
@@ -39,7 +39,7 @@ struct ARCRegisterInfo : public ARCGenRegisterInfo {
 
   bool useFPForScavengingIndex(const MachineFunction &MF) const override;
 
-  void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
+  bool eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
                            unsigned FIOperandNum,
                            RegScavenger *RS = nullptr) const override;
 

diff  --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 1d0e743b94dbf..cf156f4c92ea7 100644
--- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -786,7 +786,7 @@ bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
   return false;
 }
 
-void
+bool
 ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
                                          int SPAdj, unsigned FIOperandNum,
                                          RegScavenger *RS) const {
@@ -830,7 +830,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
     Done = rewriteT2FrameIndex(MI, FIOperandNum, FrameReg, Offset, TII, this);
   }
   if (Done)
-    return;
+    return false;
 
   // If we get here, the immediate doesn't fit into the instruction.  We folded
   // as much as possible above, handle the rest, providing a register that is
@@ -872,6 +872,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
     // Update the original instruction to use the scratch register.
     MI.getOperand(FIOperandNum).ChangeToRegister(ScratchReg, false, false,true);
   }
+  return false;
 }
 
 bool ARMBaseRegisterInfo::shouldCoalesce(MachineInstr *MI,

diff  --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
index 73ed300ccff46..d1d0254a3de9b 100644
--- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -224,7 +224,7 @@ class ARMBaseRegisterInfo : public ARMGenRegisterInfo {
 
   bool requiresVirtualBaseRegisters(const MachineFunction &MF) const override;
 
-  void eliminateFrameIndex(MachineBasicBlock::iterator II,
+  bool eliminateFrameIndex(MachineBasicBlock::iterator II,
                            int SPAdj, unsigned FIOperandNum,
                            RegScavenger *RS = nullptr) const override;
 

diff  --git a/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp b/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp
index 2a3fa3b31512a..ef24cc9d5b860 100644
--- a/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp
+++ b/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp
@@ -458,7 +458,7 @@ void ThumbRegisterInfo::resolveFrameIndex(MachineInstr &MI, Register BaseReg,
   (void)Done;
 }
 
-void ThumbRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+bool ThumbRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
                                             int SPAdj, unsigned FIOperandNum,
                                             RegScavenger *RS) const {
   MachineInstr &MI = *II;
@@ -498,14 +498,14 @@ void ThumbRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   if (MI.isDebugValue()) {
     MI.getOperand(FIOperandNum).  ChangeToRegister(FrameReg, false /*isDef*/);
     MI.getOperand(FIOperandNum+1).ChangeToImmediate(Offset);
-    return;
+    return false;
   }
 
   // Modify MI as necessary to handle as much of 'Offset' as possible
   assert(MF.getInfo<ARMFunctionInfo>()->isThumbFunction() &&
          "This eliminateFrameIndex only supports Thumb1!");
   if (rewriteFrameIndex(MI, FIOperandNum, FrameReg, Offset, TII))
-    return;
+    return true;
 
   // If we get here, the immediate doesn't fit into the instruction.  We folded
   // as much as possible above, handle the rest, providing a register that is
@@ -596,6 +596,7 @@ void ThumbRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   // Add predicate back if it's needed.
   if (MI.isPredicable())
     MIB.add(predOps(ARMCC::AL));
+  return false;
 }
 
 bool

diff  --git a/llvm/lib/Target/ARM/ThumbRegisterInfo.h b/llvm/lib/Target/ARM/ThumbRegisterInfo.h
index e05a24dbaca52..ccfe211b808a5 100644
--- a/llvm/lib/Target/ARM/ThumbRegisterInfo.h
+++ b/llvm/lib/Target/ARM/ThumbRegisterInfo.h
@@ -51,7 +51,7 @@ struct ThumbRegisterInfo : public ARMBaseRegisterInfo {
                          const ARMBaseInstrInfo &TII) const;
   void resolveFrameIndex(MachineInstr &MI, Register BaseReg,
                          int64_t Offset) const override;
-  void eliminateFrameIndex(MachineBasicBlock::iterator II,
+  bool eliminateFrameIndex(MachineBasicBlock::iterator II,
                            int SPAdj, unsigned FIOperandNum,
                            RegScavenger *RS = nullptr) const override;
   bool useFPForScavengingIndex(const MachineFunction &MF) const override;

diff  --git a/llvm/lib/Target/AVR/AVRRegisterInfo.cpp b/llvm/lib/Target/AVR/AVRRegisterInfo.cpp
index baf336720a91b..3f8bb7e67db80 100644
--- a/llvm/lib/Target/AVR/AVRRegisterInfo.cpp
+++ b/llvm/lib/Target/AVR/AVRRegisterInfo.cpp
@@ -140,7 +140,7 @@ static void foldFrameOffset(MachineBasicBlock::iterator &II, int &Offset,
   MI.eraseFromParent();
 }
 
-void AVRRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+bool AVRRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
                                           int SPAdj, unsigned FIOperandNum,
                                           RegScavenger *RS) const {
   assert(SPAdj == 0 && "Unexpected SPAdj value");
@@ -214,7 +214,7 @@ void AVRRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
                             .addImm(Offset);
     New->getOperand(3).setIsDead();
 
-    return;
+    return false;
   }
 
   // If the offset is too big we have to adjust and restore the frame pointer
@@ -261,6 +261,7 @@ void AVRRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   MI.getOperand(FIOperandNum).ChangeToRegister(AVR::R29R28, false);
   assert(isUInt<6>(Offset) && "Offset is out of range");
   MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
+  return false;
 }
 
 Register AVRRegisterInfo::getFrameRegister(const MachineFunction &MF) const {

diff  --git a/llvm/lib/Target/AVR/AVRRegisterInfo.h b/llvm/lib/Target/AVR/AVRRegisterInfo.h
index 2c5647b52c1cd..8eb0cf3039bbd 100644
--- a/llvm/lib/Target/AVR/AVRRegisterInfo.h
+++ b/llvm/lib/Target/AVR/AVRRegisterInfo.h
@@ -37,7 +37,7 @@ class AVRRegisterInfo : public AVRGenRegisterInfo {
                             const MachineFunction &MF) const override;
 
   /// Stack Frame Processing Methods
-  void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
+  bool eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
                            unsigned FIOperandNum,
                            RegScavenger *RS = nullptr) const override;
 

diff  --git a/llvm/lib/Target/BPF/BPFRegisterInfo.cpp b/llvm/lib/Target/BPF/BPFRegisterInfo.cpp
index 8de81a469b844..9bd39fd285a0f 100644
--- a/llvm/lib/Target/BPF/BPFRegisterInfo.cpp
+++ b/llvm/lib/Target/BPF/BPFRegisterInfo.cpp
@@ -53,7 +53,7 @@ static void WarnSize(int Offset, MachineFunction &MF, DebugLoc& DL)
   }
 }
 
-void BPFRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+bool BPFRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
                                           int SPAdj, unsigned FIOperandNum,
                                           RegScavenger *RS) const {
   assert(SPAdj == 0 && "Unexpected");
@@ -90,7 +90,7 @@ void BPFRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
     BuildMI(MBB, ++II, DL, TII.get(BPF::ADD_ri), reg)
         .addReg(reg)
         .addImm(Offset);
-    return;
+    return false;
   }
 
   int Offset = MF.getFrameInfo().getObjectOffset(FrameIndex) +
@@ -119,6 +119,7 @@ void BPFRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
     MI.getOperand(i).ChangeToRegister(FrameReg, false);
     MI.getOperand(i + 1).ChangeToImmediate(Offset);
   }
+  return false;
 }
 
 Register BPFRegisterInfo::getFrameRegister(const MachineFunction &MF) const {

diff  --git a/llvm/lib/Target/BPF/BPFRegisterInfo.h b/llvm/lib/Target/BPF/BPFRegisterInfo.h
index e7b870b720a4e..f7dea75ebea6f 100644
--- a/llvm/lib/Target/BPF/BPFRegisterInfo.h
+++ b/llvm/lib/Target/BPF/BPFRegisterInfo.h
@@ -28,7 +28,7 @@ struct BPFRegisterInfo : public BPFGenRegisterInfo {
 
   BitVector getReservedRegs(const MachineFunction &MF) const override;
 
-  void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
+  bool eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
                            unsigned FIOperandNum,
                            RegScavenger *RS = nullptr) const override;
 

diff  --git a/llvm/lib/Target/CSKY/CSKYRegisterInfo.cpp b/llvm/lib/Target/CSKY/CSKYRegisterInfo.cpp
index 4f7811d22868e..acddb7a4dbdbc 100644
--- a/llvm/lib/Target/CSKY/CSKYRegisterInfo.cpp
+++ b/llvm/lib/Target/CSKY/CSKYRegisterInfo.cpp
@@ -179,7 +179,7 @@ static bool IsLegalOffset(const CSKYInstrInfo *TII, MachineInstr *MI,
   return false;
 }
 
-void CSKYRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+bool CSKYRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
                                            int SPAdj, unsigned FIOperandNum,
                                            RegScavenger *RS) const {
   assert(SPAdj == 0 && "Unexpected non-zero SPAdj value");
@@ -287,4 +287,5 @@ void CSKYRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
         .ChangeToRegister(FrameReg, false, false, FrameRegIsKill);
     MI->getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
   }
+  return false;
 }

diff  --git a/llvm/lib/Target/CSKY/CSKYRegisterInfo.h b/llvm/lib/Target/CSKY/CSKYRegisterInfo.h
index 5b3b62ec0db27..0f2da40afbff0 100644
--- a/llvm/lib/Target/CSKY/CSKYRegisterInfo.h
+++ b/llvm/lib/Target/CSKY/CSKYRegisterInfo.h
@@ -35,7 +35,7 @@ class CSKYRegisterInfo : public CSKYGenRegisterInfo {
 
   const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
 
-  void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
+  bool eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
                            unsigned FIOperandNum,
                            RegScavenger *RS) const override;
 

diff  --git a/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp
index 4495112c7c053..8a84f55e33cdc 100644
--- a/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp
@@ -205,7 +205,7 @@ BitVector HexagonRegisterInfo::getReservedRegs(const MachineFunction &MF)
   return Reserved;
 }
 
-void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+bool HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
                                               int SPAdj, unsigned FIOp,
                                               RegScavenger *RS) const {
   static unsigned ReuseCount = 0;
@@ -234,7 +234,7 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
       MI.setDesc(HII.get(Hexagon::A2_addi));
       MI.getOperand(FIOp).ChangeToImmediate(RealOffset);
       MI.removeOperand(FIOp+1);
-      return;
+      return false;
     case Hexagon::PS_fi:
       // Set up the instruction for updating below.
       MI.setDesc(HII.get(Hexagon::A2_addi));
@@ -346,6 +346,7 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
 
   MI.getOperand(FIOp).ChangeToRegister(BP, false, false, false);
   MI.getOperand(FIOp+1).ChangeToImmediate(RealOffset);
+  return false;
 }
 
 

diff  --git a/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h b/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h
index 572da8dc907c0..4766fb5a84975 100644
--- a/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h
+++ b/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h
@@ -38,7 +38,7 @@ class HexagonRegisterInfo : public HexagonGenRegisterInfo {
 
   BitVector getReservedRegs(const MachineFunction &MF) const override;
 
-  void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
+  bool eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
         unsigned FIOperandNum, RegScavenger *RS = nullptr) const override;
 
   /// Returns true since we may need scavenging for a temporary register

diff  --git a/llvm/lib/Target/Lanai/LanaiRegisterInfo.cpp b/llvm/lib/Target/Lanai/LanaiRegisterInfo.cpp
index 03cf102051730..5450bdb307648 100644
--- a/llvm/lib/Target/Lanai/LanaiRegisterInfo.cpp
+++ b/llvm/lib/Target/Lanai/LanaiRegisterInfo.cpp
@@ -128,7 +128,7 @@ static unsigned getRRMOpcodeVariant(unsigned Opcode) {
   }
 }
 
-void LanaiRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+bool LanaiRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
                                             int SPAdj, unsigned FIOperandNum,
                                             RegScavenger *RS) const {
   assert(SPAdj == 0 && "Unexpected");
@@ -200,7 +200,7 @@ void LanaiRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
           .addReg(Reg)
           .addImm(LPCC::ICC_T);
       MI.eraseFromParent();
-      return;
+      return true;
     }
     if (isSPLSOpcode(MI.getOpcode()) || isRMOpcode(MI.getOpcode())) {
       MI.setDesc(TII->get(getRRMOpcodeVariant(MI.getOpcode())));
@@ -218,7 +218,7 @@ void LanaiRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
     MI.getOperand(FIOperandNum + 1)
         .ChangeToRegister(Reg, /*isDef=*/false, /*isImp=*/false,
                           /*isKill=*/true);
-    return;
+    return false;
   }
 
   // ALU arithmetic ops take unsigned immediates. If the offset is negative,
@@ -239,6 +239,7 @@ void LanaiRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
     MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, /*isDef=*/false);
     MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
   }
+  return false;
 }
 
 bool LanaiRegisterInfo::hasBasePointer(const MachineFunction &MF) const {

diff  --git a/llvm/lib/Target/Lanai/LanaiRegisterInfo.h b/llvm/lib/Target/Lanai/LanaiRegisterInfo.h
index cbc95b273e1df..5168dddd93019 100644
--- a/llvm/lib/Target/Lanai/LanaiRegisterInfo.h
+++ b/llvm/lib/Target/Lanai/LanaiRegisterInfo.h
@@ -34,7 +34,7 @@ struct LanaiRegisterInfo : public LanaiGenRegisterInfo {
 
   bool requiresRegisterScavenging(const MachineFunction &MF) const override;
 
-  void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
+  bool eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
                            unsigned FIOperandNum,
                            RegScavenger *RS = nullptr) const override;
 

diff  --git a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp
index 8655c5bdf7816..f84303232875c 100644
--- a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp
@@ -111,7 +111,7 @@ LoongArchRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
   return TFI->hasFP(MF) ? LoongArch::R22 : LoongArch::R3;
 }
 
-void LoongArchRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+bool LoongArchRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
                                                 int SPAdj,
                                                 unsigned FIOperandNum,
                                                 RegScavenger *RS) const {
@@ -155,7 +155,7 @@ void LoongArchRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
           .addReg(FrameReg)
           .addReg(ScratchReg, RegState::Kill);
       MI.eraseFromParent();
-      return;
+      return true;
     }
     BuildMI(MBB, II, DL, TII->get(Add), ScratchReg)
         .addReg(FrameReg)
@@ -175,7 +175,7 @@ void LoongArchRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
         .addReg(FrameReg)
         .addImm(Offset.getFixed());
     MI.eraseFromParent();
-    return;
+    return true;
   }
 
   // Reload CFRs.
@@ -189,10 +189,11 @@ void LoongArchRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
         .add(MI.getOperand(0))
         .addReg(ScratchReg, RegState::Kill);
     MI.eraseFromParent();
-    return;
+    return true;
   }
 
   MI.getOperand(FIOperandNum)
       .ChangeToRegister(FrameReg, false, false, FrameRegIsKill);
   MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset.getFixed());
+  return false;
 }

diff  --git a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.h b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.h
index e9e4c88fe4c8a..7e8f26b140976 100644
--- a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.h
+++ b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.h
@@ -38,7 +38,7 @@ struct LoongArchRegisterInfo : public LoongArchGenRegisterInfo {
     return &LoongArch::GPRRegClass;
   }
 
-  void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
+  bool eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
                            unsigned FIOperandNum,
                            RegScavenger *RS = nullptr) const override;
 

diff  --git a/llvm/lib/Target/M68k/M68kRegisterInfo.cpp b/llvm/lib/Target/M68k/M68kRegisterInfo.cpp
index 5b632299fa4c8..d12705bc935c7 100644
--- a/llvm/lib/Target/M68k/M68kRegisterInfo.cpp
+++ b/llvm/lib/Target/M68k/M68kRegisterInfo.cpp
@@ -162,7 +162,7 @@ BitVector M68kRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   return Reserved;
 }
 
-void M68kRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+bool M68kRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
                                            int SPAdj, unsigned FIOperandNum,
                                            RegScavenger *RS) const {
   MachineInstr &MI = *II;
@@ -208,6 +208,7 @@ void M68kRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
     FIOffset += SPAdj;
 
   Disp.ChangeToImmediate(FIOffset + Imm);
+  return false;
 }
 
 bool M68kRegisterInfo::requiresRegisterScavenging(

diff  --git a/llvm/lib/Target/M68k/M68kRegisterInfo.h b/llvm/lib/Target/M68k/M68kRegisterInfo.h
index fc55e19a958be..0cfab150d0c84 100644
--- a/llvm/lib/Target/M68k/M68kRegisterInfo.h
+++ b/llvm/lib/Target/M68k/M68kRegisterInfo.h
@@ -87,7 +87,7 @@ class M68kRegisterInfo : public M68kGenRegisterInfo {
 
   /// FrameIndex represent objects inside a abstract stack. We must replace
   /// FrameIndex with an stack/frame pointer direct reference.
-  void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
+  bool eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
                            unsigned FIOperandNum,
                            RegScavenger *RS = nullptr) const override;
 

diff  --git a/llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp b/llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp
index 5583ebee2f311..4ee40f47b8c19 100644
--- a/llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp
+++ b/llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp
@@ -99,7 +99,7 @@ MSP430RegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind)
   return &MSP430::GR16RegClass;
 }
 
-void
+bool
 MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
                                         int SPAdj, unsigned FIOperandNum,
                                         RegScavenger *RS) const {
@@ -136,7 +136,7 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
     MI.getOperand(FIOperandNum).ChangeToRegister(BasePtr, false);
 
     if (Offset == 0)
-      return;
+      return false;
 
     // We need to materialize the offset via add instruction.
     Register DstReg = MI.getOperand(0).getReg();
@@ -147,11 +147,12 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
       BuildMI(MBB, std::next(II), dl, TII.get(MSP430::ADD16ri), DstReg)
         .addReg(DstReg).addImm(Offset);
 
-    return;
+    return false;
   }
 
   MI.getOperand(FIOperandNum).ChangeToRegister(BasePtr, false);
   MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
+  return false;
 }
 
 Register MSP430RegisterInfo::getFrameRegister(const MachineFunction &MF) const {

diff  --git a/llvm/lib/Target/MSP430/MSP430RegisterInfo.h b/llvm/lib/Target/MSP430/MSP430RegisterInfo.h
index c3eff93f55d27..78b02cf8ecc0d 100644
--- a/llvm/lib/Target/MSP430/MSP430RegisterInfo.h
+++ b/llvm/lib/Target/MSP430/MSP430RegisterInfo.h
@@ -32,7 +32,7 @@ struct MSP430RegisterInfo : public MSP430GenRegisterInfo {
   getPointerRegClass(const MachineFunction &MF,
                      unsigned Kind = 0) const override;
 
-  void eliminateFrameIndex(MachineBasicBlock::iterator II,
+  bool eliminateFrameIndex(MachineBasicBlock::iterator II,
                            int SPAdj, unsigned FIOperandNum,
                            RegScavenger *RS = nullptr) const override;
 

diff  --git a/llvm/lib/Target/Mips/MipsRegisterInfo.cpp b/llvm/lib/Target/Mips/MipsRegisterInfo.cpp
index 390ab9d22024b..e1fa033797769 100644
--- a/llvm/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/llvm/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -247,7 +247,7 @@ MipsRegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const {
 // FrameIndex represent objects inside a abstract stack.
 // We must replace FrameIndex with an stack/frame pointer
 // direct reference.
-void MipsRegisterInfo::
+bool MipsRegisterInfo::
 eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
                     unsigned FIOperandNum, RegScavenger *RS) const {
   MachineInstr &MI = *II;
@@ -269,6 +269,7 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
                     << "\n");
 
   eliminateFI(MI, FIOperandNum, FrameIndex, stackSize, spOffset);
+  return false;
 }
 
 Register MipsRegisterInfo::

diff  --git a/llvm/lib/Target/Mips/MipsRegisterInfo.h b/llvm/lib/Target/Mips/MipsRegisterInfo.h
index 06f214c2d6b12..7eaab8d1d2060 100644
--- a/llvm/lib/Target/Mips/MipsRegisterInfo.h
+++ b/llvm/lib/Target/Mips/MipsRegisterInfo.h
@@ -59,7 +59,7 @@ class MipsRegisterInfo : public MipsGenRegisterInfo {
   bool requiresRegisterScavenging(const MachineFunction &MF) const override;
 
   /// Stack Frame Processing Methods
-  void eliminateFrameIndex(MachineBasicBlock::iterator II,
+  bool eliminateFrameIndex(MachineBasicBlock::iterator II,
                            int SPAdj, unsigned FIOperandNum,
                            RegScavenger *RS = nullptr) const override;
 

diff  --git a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
index ec7307265bcaf..291572ba58efd 100644
--- a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
@@ -118,7 +118,7 @@ BitVector NVPTXRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   return Reserved;
 }
 
-void NVPTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+bool NVPTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
                                             int SPAdj, unsigned FIOperandNum,
                                             RegScavenger *RS) const {
   assert(SPAdj == 0 && "Unexpected");
@@ -133,6 +133,7 @@ void NVPTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   // Using I0 as the frame pointer
   MI.getOperand(FIOperandNum).ChangeToRegister(getFrameRegister(MF), false);
   MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
+  return false;
 }
 
 Register NVPTXRegisterInfo::getFrameRegister(const MachineFunction &MF) const {

diff  --git a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.h b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.h
index c6dd647f46373..57218b9d60af8 100644
--- a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.h
+++ b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.h
@@ -38,7 +38,7 @@ class NVPTXRegisterInfo : public NVPTXGenRegisterInfo {
 
   BitVector getReservedRegs(const MachineFunction &MF) const override;
 
-  void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
+  bool eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
                            unsigned FIOperandNum,
                            RegScavenger *RS = nullptr) const override;
 

diff  --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
index ec1307988bf60..a94378a9b1cd7 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -1485,7 +1485,7 @@ static unsigned getOffsetONFromFION(const MachineInstr &MI,
   return OffsetOperandNo;
 }
 
-void
+bool
 PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
                                      int SPAdj, unsigned FIOperandNum,
                                      RegScavenger *RS) const {
@@ -1518,14 +1518,16 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
 
   if ((OpC == PPC::DYNAREAOFFSET || OpC == PPC::DYNAREAOFFSET8)) {
     lowerDynamicAreaOffset(II);
-    return;
+    // lowerDynamicAreaOffset erases II
+    return true;
   }
 
   // Special case for dynamic alloca.
   if (FPSI && FrameIndex == FPSI &&
       (OpC == PPC::DYNALLOC || OpC == PPC::DYNALLOC8)) {
     lowerDynamicAlloc(II);
-    return;
+    // lowerDynamicAlloc erases II
+    return true;
   }
 
   if (FPSI && FrameIndex == FPSI &&
@@ -1534,37 +1536,38 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
        OpC == PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64 ||
        OpC == PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32)) {
     lowerPrepareProbedAlloca(II);
-    return;
+    // lowerPrepareProbedAlloca erases II
+    return true;
   }
 
   // Special case for pseudo-ops SPILL_CR and RESTORE_CR, etc.
   if (OpC == PPC::SPILL_CR) {
     lowerCRSpilling(II, FrameIndex);
-    return;
+    return true;
   } else if (OpC == PPC::RESTORE_CR) {
     lowerCRRestore(II, FrameIndex);
-    return;
+    return true;
   } else if (OpC == PPC::SPILL_CRBIT) {
     lowerCRBitSpilling(II, FrameIndex);
-    return;
+    return true;
   } else if (OpC == PPC::RESTORE_CRBIT) {
     lowerCRBitRestore(II, FrameIndex);
-    return;
+    return true;
   } else if (OpC == PPC::SPILL_ACC || OpC == PPC::SPILL_UACC) {
     lowerACCSpilling(II, FrameIndex);
-    return;
+    return true;
   } else if (OpC == PPC::RESTORE_ACC || OpC == PPC::RESTORE_UACC) {
     lowerACCRestore(II, FrameIndex);
-    return;
+    return true;
   } else if (OpC == PPC::STXVP && DisableAutoPairedVecSt) {
     lowerOctWordSpilling(II, FrameIndex);
-    return;
+    return true;
   } else if (OpC == PPC::SPILL_QUADWORD) {
     lowerQuadwordSpilling(II, FrameIndex);
-    return;
+    return true;
   } else if (OpC == PPC::RESTORE_QUADWORD) {
     lowerQuadwordRestore(II, FrameIndex);
-    return;
+    return true;
   }
 
   // Replace the FrameIndex with base register with GPR1 (SP) or GPR31 (FP).
@@ -1621,7 +1624,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
                      OpC == TargetOpcode::STACKMAP ||
                      OpC == TargetOpcode::PATCHPOINT)) {
     MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset);
-    return;
+    return false;
   }
 
   // The offset doesn't fit into a single register, scavenge one to build the
@@ -1709,6 +1712,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
     MI.getOperand(OperandBase + 1).ChangeToRegister(NewReg, false);
     MI.getOperand(OperandBase).ChangeToImmediate(0);
   }
+  return false;
 }
 
 Register PPCRegisterInfo::getFrameRegister(const MachineFunction &MF) const {

diff  --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
index fea1a3afab1c2..069163fed8764 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -147,7 +147,7 @@ class PPCRegisterInfo : public PPCGenRegisterInfo {
 
   bool hasReservedSpillSlot(const MachineFunction &MF, Register Reg,
                             int &FrameIdx) const override;
-  void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
+  bool eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
                            unsigned FIOperandNum,
                            RegScavenger *RS = nullptr) const override;
 

diff  --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
index 5066c0ced0ce9..48e2b2aa2051b 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -155,7 +155,7 @@ bool RISCVRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
   return true;
 }
 
-void RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+bool RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
                                             int SPAdj, unsigned FIOperandNum,
                                             RegScavenger *RS) const {
   assert(SPAdj == 0 && "Unexpected non-zero SPAdj value");
@@ -231,7 +231,7 @@ void RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
       assert(MI.getOperand(0).getReg() == ScratchReg &&
              "Expected to have written ADDI destination register");
       MI.eraseFromParent();
-      return;
+      return true;
     }
 
     Offset = StackOffset::get(0, Offset.getScalable());
@@ -250,7 +250,7 @@ void RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
       assert(MI.getOperand(0).getReg() == DestReg &&
              "Expected to have written ADDI destination register");
       MI.eraseFromParent();
-      return;
+      return true;
     }
     FrameReg = DestReg;
     FrameRegIsKill = true;
@@ -293,6 +293,7 @@ void RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
     // the length of vint32m2_t.
     MI.getOperand(FIOperandNum + 1).ChangeToRegister(VL, /*isDef=*/false);
   }
+  return false;
 }
 
 Register RISCVRegisterInfo::getFrameRegister(const MachineFunction &MF) const {

diff  --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h
index 1c66e94871f8f..aa199de38d26b 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h
@@ -38,7 +38,7 @@ struct RISCVRegisterInfo : public RISCVGenRegisterInfo {
   bool hasReservedSpillSlot(const MachineFunction &MF, Register Reg,
                             int &FrameIdx) const override;
 
-  void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
+  bool eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
                            unsigned FIOperandNum,
                            RegScavenger *RS = nullptr) const override;
 

diff  --git a/llvm/lib/Target/SPIRV/SPIRVRegisterInfo.h b/llvm/lib/Target/SPIRV/SPIRVRegisterInfo.h
index f6f22b81e0bc0..4b2c621f27810 100644
--- a/llvm/lib/Target/SPIRV/SPIRVRegisterInfo.h
+++ b/llvm/lib/Target/SPIRV/SPIRVRegisterInfo.h
@@ -24,9 +24,11 @@ struct SPIRVRegisterInfo : public SPIRVGenRegisterInfo {
   SPIRVRegisterInfo();
   const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
   BitVector getReservedRegs(const MachineFunction &MF) const override;
-  void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
+  bool eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
                            unsigned FIOperandNum,
-                           RegScavenger *RS = nullptr) const override {}
+                           RegScavenger *RS = nullptr) const override {
+    return false;
+  }
   Register getFrameRegister(const MachineFunction &MF) const override {
     return 0;
   }

diff  --git a/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp b/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp
index 21dced23210c5..f97bf57627d1a 100644
--- a/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp
+++ b/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -160,7 +160,7 @@ static void replaceFI(MachineFunction &MF, MachineBasicBlock::iterator II,
 }
 
 
-void
+bool
 SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
                                        int SPAdj, unsigned FIOperandNum,
                                        RegScavenger *RS) const {
@@ -209,7 +209,8 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   }
 
   replaceFI(MF, II, MI, dl, FIOperandNum, Offset, FrameReg);
-
+  // replaceFI never removes II
+  return false;
 }
 
 Register SparcRegisterInfo::getFrameRegister(const MachineFunction &MF) const {

diff  --git a/llvm/lib/Target/Sparc/SparcRegisterInfo.h b/llvm/lib/Target/Sparc/SparcRegisterInfo.h
index 118ef9d80fae7..5b3c1a7ad07dd 100644
--- a/llvm/lib/Target/Sparc/SparcRegisterInfo.h
+++ b/llvm/lib/Target/Sparc/SparcRegisterInfo.h
@@ -34,7 +34,7 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo {
   const TargetRegisterClass *getPointerRegClass(const MachineFunction &MF,
                                                 unsigned Kind) const override;
 
-  void eliminateFrameIndex(MachineBasicBlock::iterator II,
+  bool eliminateFrameIndex(MachineBasicBlock::iterator II,
                            int SPAdj, unsigned FIOperandNum,
                            RegScavenger *RS = nullptr) const override;
 

diff  --git a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
index be65fe55c6344..28d01d53ccc5f 100644
--- a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -282,7 +282,7 @@ SystemZRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   return Reserved;
 }
 
-void
+bool
 SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
                                          int SPAdj, unsigned FIOperandNum,
                                          RegScavenger *RS) const {
@@ -314,7 +314,7 @@ SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
       MI->getDebugExpressionOp().setMetadata(
           DIExpression::appendOpsToArg(MI->getDebugExpression(), Ops, OpIdx));
     }
-    return;
+    return false;
   }
 
   // See if the offset is in range, or if an equivalent instruction that
@@ -374,6 +374,7 @@ SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
   }
   MI->setDesc(TII->get(OpcodeForOffset));
   MI->getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
+  return false;
 }
 
 bool SystemZRegisterInfo::shouldCoalesce(MachineInstr *MI,

diff  --git a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
index db0936f3f56b8..19305d4e89573 100644
--- a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
@@ -160,7 +160,7 @@ struct SystemZRegisterInfo : public SystemZGenRegisterInfo {
   const uint32_t *getCallPreservedMask(const MachineFunction &MF,
                                        CallingConv::ID CC) const override;
   BitVector getReservedRegs(const MachineFunction &MF) const override;
-  void eliminateFrameIndex(MachineBasicBlock::iterator MI,
+  bool eliminateFrameIndex(MachineBasicBlock::iterator MI,
                            int SPAdj, unsigned FIOperandNum,
                            RegScavenger *RS) const override;
 

diff  --git a/llvm/lib/Target/VE/VERegisterInfo.cpp b/llvm/lib/Target/VE/VERegisterInfo.cpp
index 70e5e6e939e41..dfdddf095e81e 100644
--- a/llvm/lib/Target/VE/VERegisterInfo.cpp
+++ b/llvm/lib/Target/VE/VERegisterInfo.cpp
@@ -477,7 +477,7 @@ void EliminateFrameIndex::processMI(MachineInstr &MI, Register FrameReg,
   replaceFI(MI, FrameReg, Offset, FIOperandNum);
 }
 
-void VERegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+bool VERegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
                                          int SPAdj, unsigned FIOperandNum,
                                          RegScavenger *RS) const {
   assert(SPAdj == 0 && "Unexpected");
@@ -500,6 +500,7 @@ void VERegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   Offset += MI.getOperand(FIOperandNum + offsetToDisp(MI)).getImm();
 
   EFI.processMI(MI, FrameReg, Offset, FIOperandNum);
+  return false;
 }
 
 Register VERegisterInfo::getFrameRegister(const MachineFunction &MF) const {

diff  --git a/llvm/lib/Target/VE/VERegisterInfo.h b/llvm/lib/Target/VE/VERegisterInfo.h
index bc1838f165a57..3f6feedf42534 100644
--- a/llvm/lib/Target/VE/VERegisterInfo.h
+++ b/llvm/lib/Target/VE/VERegisterInfo.h
@@ -34,7 +34,7 @@ struct VERegisterInfo : public VEGenRegisterInfo {
   const TargetRegisterClass *getPointerRegClass(const MachineFunction &MF,
                                                 unsigned Kind) const override;
 
-  void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
+  bool eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
                            unsigned FIOperandNum,
                            RegScavenger *RS = nullptr) const override;
 

diff  --git a/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
index 6b6394a583393..7048f8926508d 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
@@ -50,7 +50,7 @@ WebAssemblyRegisterInfo::getReservedRegs(const MachineFunction & /*MF*/) const {
   return Reserved;
 }
 
-void WebAssemblyRegisterInfo::eliminateFrameIndex(
+bool WebAssemblyRegisterInfo::eliminateFrameIndex(
     MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum,
     RegScavenger * /*RS*/) const {
   assert(SPAdj == 0);
@@ -82,7 +82,7 @@ void WebAssemblyRegisterInfo::eliminateFrameIndex(
       MI.getOperand(OffsetOperandNum).setImm(Offset);
       MI.getOperand(FIOperandNum)
           .ChangeToRegister(FrameRegister, /*isDef=*/false);
-      return;
+      return false;
     }
   }
 
@@ -105,7 +105,7 @@ void WebAssemblyRegisterInfo::eliminateFrameIndex(
             ImmMO.setImm(ImmMO.getImm() + uint32_t(FrameOffset));
             MI.getOperand(FIOperandNum)
                 .ChangeToRegister(FrameRegister, /*isDef=*/false);
-            return;
+            return false;
           }
         }
       }
@@ -133,6 +133,7 @@ void WebAssemblyRegisterInfo::eliminateFrameIndex(
         .addReg(OffsetOp);
   }
   MI.getOperand(FIOperandNum).ChangeToRegister(FIRegOperand, /*isDef=*/false);
+  return false;
 }
 
 Register

diff  --git a/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h
index 7880eb217dbf3..d875e4b93603b 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h
@@ -34,7 +34,7 @@ class WebAssemblyRegisterInfo final : public WebAssemblyGenRegisterInfo {
   // Code Generation virtual methods.
   const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
   BitVector getReservedRegs(const MachineFunction &MF) const override;
-  void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
+  bool eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
                            unsigned FIOperandNum,
                            RegScavenger *RS = nullptr) const override;
 

diff  --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp
index f2658f70434b7..dc1a3a225caf0 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -782,7 +782,7 @@ static bool isFuncletReturnInstr(MachineInstr &MI) {
   llvm_unreachable("impossible");
 }
 
-void
+bool
 X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
                                      int SPAdj, unsigned FIOperandNum,
                                      RegScavenger *RS) const {
@@ -819,7 +819,7 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   if (Opc == TargetOpcode::LOCAL_ESCAPE) {
     MachineOperand &FI = MI.getOperand(FIOperandNum);
     FI.ChangeToImmediate(FIOffset);
-    return;
+    return false;
   }
 
   // For LEA64_32r when BasePtr is 32-bits (X32) we can use full-size 64-bit
@@ -843,7 +843,7 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
     assert(BasePtr == FramePtr && "Expected the FP as base register");
     int64_t Offset = MI.getOperand(FIOperandNum + 1).getImm() + FIOffset;
     MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
-    return;
+    return false;
   }
 
   if (MI.getOperand(FIOperandNum+3).isImm()) {
@@ -860,6 +860,7 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
       (uint64_t)MI.getOperand(FIOperandNum+3).getOffset();
     MI.getOperand(FIOperandNum + 3).setOffset(Offset);
   }
+  return false;
 }
 
 unsigned X86RegisterInfo::findDeadCallerSavedReg(

diff  --git a/llvm/lib/Target/X86/X86RegisterInfo.h b/llvm/lib/Target/X86/X86RegisterInfo.h
index 6f4fb405d29fc..f88d4b18f1d86 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.h
+++ b/llvm/lib/Target/X86/X86RegisterInfo.h
@@ -133,7 +133,7 @@ class X86RegisterInfo final : public X86GenRegisterInfo {
 
   bool canRealignStack(const MachineFunction &MF) const override;
 
-  void eliminateFrameIndex(MachineBasicBlock::iterator MI,
+  bool eliminateFrameIndex(MachineBasicBlock::iterator MI,
                            int SPAdj, unsigned FIOperandNum,
                            RegScavenger *RS = nullptr) const override;
 

diff  --git a/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp b/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp
index 0d1ba39b8b10b..ed5a0ad5d4b83 100644
--- a/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp
+++ b/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp
@@ -250,7 +250,7 @@ XCoreRegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const {
   return false;
 }
 
-void
+bool
 XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
                                        int SPAdj, unsigned FIOperandNum,
                                        RegScavenger *RS) const {
@@ -284,7 +284,7 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   if (MI.isDebugValue()) {
     MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false /*isDef*/);
     MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
-    return;
+    return false;
   }
 
   // fold constant into offset.
@@ -313,6 +313,7 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   // Erase old instruction.
   MachineBasicBlock &MBB = *MI.getParent();
   MBB.erase(II);
+  return true;
 }
 
 

diff  --git a/llvm/lib/Target/XCore/XCoreRegisterInfo.h b/llvm/lib/Target/XCore/XCoreRegisterInfo.h
index f1eec7bc87b43..b72875c29c343 100644
--- a/llvm/lib/Target/XCore/XCoreRegisterInfo.h
+++ b/llvm/lib/Target/XCore/XCoreRegisterInfo.h
@@ -34,7 +34,7 @@ struct XCoreRegisterInfo : public XCoreGenRegisterInfo {
 
   bool useFPForScavengingIndex(const MachineFunction &MF) const override;
 
-  void eliminateFrameIndex(MachineBasicBlock::iterator II,
+  bool eliminateFrameIndex(MachineBasicBlock::iterator II,
                            int SPAdj, unsigned FIOperandNum,
                            RegScavenger *RS = nullptr) const override;
 

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll
index 54fc4ddd72ff6..a607ccb6946a0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll
@@ -155,52 +155,52 @@ define amdgpu_kernel void @kernel_caller_byval() {
 ; FLATSCR-NEXT:    v_mov_b32_e32 v0, 0
 ; FLATSCR-NEXT:    s_addc_u32 flat_scratch_hi, s1, 0
 ; FLATSCR-NEXT:    v_mov_b32_e32 v1, 0
+; FLATSCR-NEXT:    s_mov_b32 vcc_lo, 0
 ; FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
-; FLATSCR-NEXT:    s_mov_b32 s33, 0
-; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], vcc_hi offset:8
-; FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
-; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s33 offset:72
-; FLATSCR-NEXT:    s_mov_b32 s33, 0
+; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], vcc_lo offset:8
 ; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], vcc_hi offset:16
+; FLATSCR-NEXT:    s_mov_b32 s11, 0
+; FLATSCR-NEXT:    s_mov_b32 s10, 0
+; FLATSCR-NEXT:    s_mov_b32 s9, 0
+; FLATSCR-NEXT:    s_mov_b32 s8, 0
+; FLATSCR-NEXT:    s_mov_b32 s7, 0
+; FLATSCR-NEXT:    s_mov_b32 s6, 0
+; FLATSCR-NEXT:    s_mov_b32 s5, 0
+; FLATSCR-NEXT:    s_mov_b32 s1, 0
+; FLATSCR-NEXT:    s_mov_b32 s0, 0
+; FLATSCR-NEXT:    s_mov_b32 s4, 0
+; FLATSCR-NEXT:    s_mov_b32 s3, 0
+; FLATSCR-NEXT:    s_mov_b32 s2, 0
+; FLATSCR-NEXT:    s_mov_b32 vcc_lo, 0
 ; FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
-; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s33 offset:80
-; FLATSCR-NEXT:    s_mov_b32 s33, 0
-; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], vcc_hi offset:24
-; FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
-; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s33 offset:88
-; FLATSCR-NEXT:    s_mov_b32 s33, 0
-; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], vcc_hi offset:32
-; FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
-; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s33 offset:96
-; FLATSCR-NEXT:    s_mov_b32 s33, 0
-; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], vcc_hi offset:40
-; FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
-; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s33 offset:104
-; FLATSCR-NEXT:    s_mov_b32 s33, 0
-; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], vcc_hi offset:48
-; FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
-; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s33 offset:112
-; FLATSCR-NEXT:    s_mov_b32 s33, 0
-; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], vcc_hi offset:56
-; FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
-; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s33 offset:120
-; FLATSCR-NEXT:    s_mov_b32 s33, 0
-; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], vcc_hi offset:64
-; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s33 offset:128
-; FLATSCR-NEXT:    s_mov_b32 s33, 0
-; FLATSCR-NEXT:    scratch_load_dwordx2 v[0:1], off, s33 offset:8
-; FLATSCR-NEXT:    s_mov_b32 s33, 0
-; FLATSCR-NEXT:    scratch_load_dwordx2 v[2:3], off, s33 offset:16
-; FLATSCR-NEXT:    s_mov_b32 s33, 0
-; FLATSCR-NEXT:    scratch_load_dwordx2 v[4:5], off, s33 offset:24
-; FLATSCR-NEXT:    s_mov_b32 s33, 0
-; FLATSCR-NEXT:    scratch_load_dwordx2 v[6:7], off, s33 offset:32
-; FLATSCR-NEXT:    s_mov_b32 s33, 0
-; FLATSCR-NEXT:    scratch_load_dwordx2 v[8:9], off, s33 offset:40
-; FLATSCR-NEXT:    s_mov_b32 s33, 0
-; FLATSCR-NEXT:    scratch_load_dwordx2 v[10:11], off, s33 offset:48
-; FLATSCR-NEXT:    s_mov_b32 s33, 0
-; FLATSCR-NEXT:    scratch_load_dwordx2 v[12:13], off, s33 offset:56
+; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s11 offset:24
+; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s10 offset:32
+; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s9 offset:40
+; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s8 offset:48
+; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s7 offset:56
+; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s6 offset:64
+; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s5 offset:72
+; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s1 offset:80
+; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s0 offset:88
+; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s4 offset:96
+; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s3 offset:104
+; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s2 offset:112
+; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], vcc_lo offset:120
+; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], vcc_hi offset:128
+; FLATSCR-NEXT:    s_mov_b32 s40, 0
+; FLATSCR-NEXT:    scratch_load_dwordx2 v[0:1], off, s40 offset:8
+; FLATSCR-NEXT:    s_mov_b32 s39, 0
+; FLATSCR-NEXT:    scratch_load_dwordx2 v[2:3], off, s39 offset:16
+; FLATSCR-NEXT:    s_mov_b32 s38, 0
+; FLATSCR-NEXT:    scratch_load_dwordx2 v[4:5], off, s38 offset:24
+; FLATSCR-NEXT:    s_mov_b32 s37, 0
+; FLATSCR-NEXT:    scratch_load_dwordx2 v[6:7], off, s37 offset:32
+; FLATSCR-NEXT:    s_mov_b32 s36, 0
+; FLATSCR-NEXT:    scratch_load_dwordx2 v[8:9], off, s36 offset:40
+; FLATSCR-NEXT:    s_mov_b32 s35, 0
+; FLATSCR-NEXT:    scratch_load_dwordx2 v[10:11], off, s35 offset:48
+; FLATSCR-NEXT:    s_mov_b32 s34, 0
+; FLATSCR-NEXT:    scratch_load_dwordx2 v[12:13], off, s34 offset:56
 ; FLATSCR-NEXT:    s_mov_b32 s33, 0
 ; FLATSCR-NEXT:    scratch_load_dwordx2 v[14:15], off, s33 offset:64
 ; FLATSCR-NEXT:    s_movk_i32 s32, 0x50

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll
index cfa56f05fac7a..1e4438526779a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll
@@ -308,10 +308,10 @@ define i16 @v_extract_v128i16_varidx(<128 x i16> addrspace(1)* %ptr, i32 %idx) {
 ; GCN-NEXT:    buffer_load_dword v30, off, s[0:3], s33 offset:568 ; 4-byte Folded Reload
 ; GCN-NEXT:    buffer_load_dword v31, off, s[0:3], s33 offset:572 ; 4-byte Folded Reload
 ; GCN-NEXT:    v_bfe_u32 v0, v6, 1, 6
-; GCN-NEXT:    v_lshrrev_b32_e64 v5, 6, s33
+; GCN-NEXT:    v_lshrrev_b32_e64 v2, 6, s33
 ; GCN-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GCN-NEXT:    v_add_u32_e32 v5, 0x100, v5
-; GCN-NEXT:    v_add_u32_e32 v0, v5, v0
+; GCN-NEXT:    v_add_u32_e32 v2, 0x100, v2
+; GCN-NEXT:    v_add_u32_e32 v0, v2, v0
 ; GCN-NEXT:    v_and_b32_e32 v1, 1, v6
 ; GCN-NEXT:    v_lshlrev_b32_e32 v1, 4, v1
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
@@ -487,10 +487,10 @@ define i64 @v_extract_v32i64_varidx(<32 x i64> addrspace(1)* %ptr, i32 %idx) {
 ; GCN-NEXT:    buffer_load_dword v30, off, s[0:3], s33 offset:568 ; 4-byte Folded Reload
 ; GCN-NEXT:    buffer_load_dword v31, off, s[0:3], s33 offset:572 ; 4-byte Folded Reload
 ; GCN-NEXT:    v_and_b32_e32 v0, 31, v6
-; GCN-NEXT:    v_lshrrev_b32_e64 v5, 6, s33
+; GCN-NEXT:    v_lshrrev_b32_e64 v2, 6, s33
 ; GCN-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
-; GCN-NEXT:    v_add_u32_e32 v5, 0x100, v5
-; GCN-NEXT:    v_add_u32_e32 v1, v5, v0
+; GCN-NEXT:    v_add_u32_e32 v2, 0x100, v2
+; GCN-NEXT:    v_add_u32_e32 v1, v2, v0
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    v_mov_b32_e32 v16, v20
 ; GCN-NEXT:    v_mov_b32_e32 v17, v21

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
index 8d0733ea0037f..7d0029e9efa56 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
@@ -458,9 +458,9 @@ define void @store_load_vindex_small_offset_foo(i32 %idx) {
 ; GFX9-NEXT:    scratch_load_dword v1, off, s32 glc
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
-; GFX9-NEXT:    s_add_i32 vcc_hi, s32, 0x100
+; GFX9-NEXT:    s_add_i32 vcc_lo, s32, 0x100
 ; GFX9-NEXT:    v_and_b32_e32 v0, 15, v0
-; GFX9-NEXT:    v_add_u32_e32 v1, vcc_hi, v1
+; GFX9-NEXT:    v_add_u32_e32 v1, vcc_lo, v1
 ; GFX9-NEXT:    v_mov_b32_e32 v2, 15
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX9-NEXT:    s_add_i32 vcc_hi, s32, 0x100
@@ -477,13 +477,13 @@ define void @store_load_vindex_small_offset_foo(i32 %idx) {
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-NEXT:    v_and_b32_e32 v1, 15, v0
 ; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-NEXT:    s_add_i32 s0, s32, 0x100
 ; GFX10-NEXT:    s_add_i32 vcc_lo, s32, 0x100
 ; GFX10-NEXT:    v_mov_b32_e32 v2, 15
+; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 2, v1
+; GFX10-NEXT:    v_add_nc_u32_e32 v0, s0, v0
 ; GFX10-NEXT:    scratch_load_dword v3, off, s32 glc dlc
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 2, v1
-; GFX10-NEXT:    v_add_nc_u32_e32 v0, vcc_lo, v0
-; GFX10-NEXT:    s_add_i32 vcc_lo, s32, 0x100
 ; GFX10-NEXT:    v_add_nc_u32_e32 v1, vcc_lo, v1
 ; GFX10-NEXT:    scratch_store_dword v0, v2, off
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
@@ -585,16 +585,16 @@ define amdgpu_kernel void @store_load_sindex_large_offset_kernel(i32 %idx) {
 ; GFX940-NEXT:    scratch_load_dword v0, off, off offset:4 sc0 sc1
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-NEXT:    v_mov_b32_e32 v0, 15
+; GFX940-NEXT:    s_movk_i32 vcc_lo, 0x4004
 ; GFX940-NEXT:    s_movk_i32 vcc_hi, 0x4004
 ; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX940-NEXT:    s_lshl_b32 s1, s0, 2
 ; GFX940-NEXT:    s_and_b32 s0, s0, 15
 ; GFX940-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX940-NEXT:    s_lshl_b32 s0, s0, 2
-; GFX940-NEXT:    scratch_store_dword v1, v0, vcc_hi sc0 sc1
+; GFX940-NEXT:    scratch_store_dword v1, v0, vcc_lo sc0 sc1
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-NEXT:    v_mov_b32_e32 v0, s0
-; GFX940-NEXT:    s_movk_i32 vcc_hi, 0x4004
 ; GFX940-NEXT:    scratch_load_dword v0, v0, vcc_hi sc0 sc1
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-NEXT:    s_endpgm
@@ -613,9 +613,9 @@ define amdgpu_kernel void @store_load_sindex_large_offset_kernel(i32 %idx) {
 ; GFX11-NEXT:    s_lshl_b32 s0, s0, 2
 ; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
 ; GFX11-NEXT:    v_mov_b32_e32 v2, s0
-; GFX11-NEXT:    scratch_store_b32 v0, v1, vcc_lo dlc
+; GFX11-NEXT:    s_movk_i32 s0, 0x4004
+; GFX11-NEXT:    scratch_store_b32 v0, v1, s0 dlc
 ; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX11-NEXT:    s_movk_i32 vcc_lo, 0x4004
 ; GFX11-NEXT:    scratch_load_b32 v0, v2, vcc_lo glc dlc
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    s_endpgm
@@ -681,9 +681,9 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel() {
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
 ; GFX940-NEXT:    v_mov_b32_e32 v2, 15
-; GFX940-NEXT:    s_movk_i32 vcc_hi, 0x4004
+; GFX940-NEXT:    s_movk_i32 vcc_lo, 0x4004
 ; GFX940-NEXT:    v_sub_u32_e32 v0, 0, v0
-; GFX940-NEXT:    scratch_store_dword v1, v2, vcc_hi sc0 sc1
+; GFX940-NEXT:    scratch_store_dword v1, v2, vcc_lo sc0 sc1
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX940-NEXT:    s_movk_i32 vcc_hi, 0x4004
@@ -696,13 +696,13 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel() {
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v1, 0, v0
 ; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX11-NEXT:    v_mov_b32_e32 v2, 15
+; GFX11-NEXT:    s_movk_i32 s0, 0x4004
 ; GFX11-NEXT:    s_movk_i32 vcc_lo, 0x4004
+; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 2, v1
 ; GFX11-NEXT:    scratch_load_b32 v3, off, off offset:4 glc dlc
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 2, v1
-; GFX11-NEXT:    scratch_store_b32 v0, v2, vcc_lo dlc
+; GFX11-NEXT:    scratch_store_b32 v0, v2, s0 dlc
 ; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX11-NEXT:    s_movk_i32 vcc_lo, 0x4004
 ; GFX11-NEXT:    scratch_load_b32 v0, v1, vcc_lo offset:124 glc dlc
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    s_endpgm
@@ -731,9 +731,9 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) {
 ; GFX9-NEXT:    scratch_load_dword v1, off, s32 offset:4 glc
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
-; GFX9-NEXT:    s_add_i32 vcc_hi, s32, 0x4004
+; GFX9-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
 ; GFX9-NEXT:    v_and_b32_e32 v0, 15, v0
-; GFX9-NEXT:    v_add_u32_e32 v1, vcc_hi, v1
+; GFX9-NEXT:    v_add_u32_e32 v1, vcc_lo, v1
 ; GFX9-NEXT:    v_mov_b32_e32 v2, 15
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX9-NEXT:    s_add_i32 vcc_hi, s32, 0x4004
@@ -750,13 +750,13 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) {
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-NEXT:    v_and_b32_e32 v1, 15, v0
 ; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-NEXT:    s_add_i32 s0, s32, 0x4004
 ; GFX10-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
 ; GFX10-NEXT:    v_mov_b32_e32 v2, 15
+; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 2, v1
+; GFX10-NEXT:    v_add_nc_u32_e32 v0, s0, v0
 ; GFX10-NEXT:    scratch_load_dword v3, off, s32 offset:4 glc dlc
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 2, v1
-; GFX10-NEXT:    v_add_nc_u32_e32 v0, vcc_lo, v0
-; GFX10-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
 ; GFX10-NEXT:    v_add_nc_u32_e32 v1, vcc_lo, v1
 ; GFX10-NEXT:    scratch_store_dword v0, v2, off
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
@@ -771,9 +771,9 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) {
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
 ; GFX940-NEXT:    v_mov_b32_e32 v2, 15
-; GFX940-NEXT:    s_add_i32 vcc_hi, s32, 0x4004
+; GFX940-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
 ; GFX940-NEXT:    v_and_b32_e32 v0, 15, v0
-; GFX940-NEXT:    scratch_store_dword v1, v2, vcc_hi sc0 sc1
+; GFX940-NEXT:    scratch_store_dword v1, v2, vcc_lo sc0 sc1
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX940-NEXT:    s_add_i32 vcc_hi, s32, 0x4004
@@ -787,13 +787,14 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) {
 ; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX11-NEXT:    v_dual_mov_b32 v2, 15 :: v_dual_and_b32 v1, 15, v0
 ; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-NEXT:    s_add_i32 s0, s32, 0x4004
 ; GFX11-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 2, v1
 ; GFX11-NEXT:    scratch_load_b32 v3, off, s32 offset:4 glc dlc
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 2, v1
-; GFX11-NEXT:    scratch_store_b32 v0, v2, vcc_lo dlc
+; GFX11-NEXT:    scratch_store_b32 v0, v2, s0 dlc
 ; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX11-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
 ; GFX11-NEXT:    scratch_load_b32 v0, v1, vcc_lo glc dlc
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]

diff  --git a/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir b/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir
index 1efddc59e0454..152e73d1b803b 100644
--- a/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir
+++ b/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir
@@ -36,6 +36,8 @@ body:             |
   ; GFX908-NEXT:   S_NOP 0
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT: bb.2:
+  ; GFX908-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
+  ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT:   S_ENDPGM 0, amdgpu_allvgprs
   ; GFX90A-LABEL: name: agpr32_restore_clobber_scc
   ; GFX90A: bb.0:
@@ -280,7 +282,7 @@ body:             |
   ; GFX90A-NEXT:   S_NOP 0
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.2:
-  ; GFX90A-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55
+  ; GFX90A-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   $agpr255 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.225, addrspace 5)
   ; GFX90A-NEXT:   $agpr254 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.224, addrspace 5)
@@ -527,6 +529,8 @@ body:             |
   ; GFX908-FLATSCR-NEXT:   S_NOP 0
   ; GFX908-FLATSCR-NEXT: {{  $}}
   ; GFX908-FLATSCR-NEXT: bb.2:
+  ; GFX908-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
+  ; GFX908-FLATSCR-NEXT: {{  $}}
   ; GFX908-FLATSCR-NEXT:   S_ENDPGM 0, amdgpu_allvgprs
   ; GFX90A-FLATSCR-LABEL: name: agpr32_restore_clobber_scc
   ; GFX90A-FLATSCR: bb.0:
@@ -772,7 +776,7 @@ body:             |
   ; GFX90A-FLATSCR-NEXT:   S_NOP 0
   ; GFX90A-FLATSCR-NEXT: {{  $}}
   ; GFX90A-FLATSCR-NEXT: bb.2:
-  ; GFX90A-FLATSCR-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55
+  ; GFX90A-FLATSCR-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
   ; GFX90A-FLATSCR-NEXT: {{  $}}
   ; GFX90A-FLATSCR-NEXT:   $agpr255 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.225, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   $agpr254 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.224, addrspace 5)
@@ -1010,6 +1014,7 @@ body:             |
     S_NOP 0
 
   bb.2:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
     S_ENDPGM 0, amdgpu_allvgprs
 ...
 
@@ -1048,6 +1053,8 @@ body:             |
   ; GFX908-NEXT:   S_NOP 0
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT: bb.2:
+  ; GFX908-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
+  ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT:   S_ENDPGM 0, amdgpu_allvgprs
   ; GFX90A-LABEL: name: agpr64_restore_clobber_scc
   ; GFX90A: bb.0:
@@ -1293,7 +1300,7 @@ body:             |
   ; GFX90A-NEXT:   S_NOP 0
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.2:
-  ; GFX90A-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55
+  ; GFX90A-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   $agpr255 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.225, addrspace 5)
   ; GFX90A-NEXT:   $agpr254 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.224, addrspace 5)
@@ -1542,6 +1549,8 @@ body:             |
   ; GFX908-FLATSCR-NEXT:   S_NOP 0
   ; GFX908-FLATSCR-NEXT: {{  $}}
   ; GFX908-FLATSCR-NEXT: bb.2:
+  ; GFX908-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
+  ; GFX908-FLATSCR-NEXT: {{  $}}
   ; GFX908-FLATSCR-NEXT:   S_ENDPGM 0, amdgpu_allvgprs
   ; GFX90A-FLATSCR-LABEL: name: agpr64_restore_clobber_scc
   ; GFX90A-FLATSCR: bb.0:
@@ -1787,7 +1796,7 @@ body:             |
   ; GFX90A-FLATSCR-NEXT:   S_NOP 0
   ; GFX90A-FLATSCR-NEXT: {{  $}}
   ; GFX90A-FLATSCR-NEXT: bb.2:
-  ; GFX90A-FLATSCR-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55
+  ; GFX90A-FLATSCR-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
   ; GFX90A-FLATSCR-NEXT: {{  $}}
   ; GFX90A-FLATSCR-NEXT:   $agpr255 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.225, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   $agpr254 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.224, addrspace 5)
@@ -2025,6 +2034,7 @@ body:             |
     S_NOP 0
 
   bb.2:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
     S_ENDPGM 0, amdgpu_allvgprs
 ...
 
@@ -2065,6 +2075,8 @@ body:             |
   ; GFX908-NEXT:   S_NOP 0
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT: bb.2:
+  ; GFX908-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
+  ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT:   S_ENDPGM 0, amdgpu_allvgprs
   ; GFX90A-LABEL: name: agpr96_restore_clobber_scc
   ; GFX90A: bb.0:
@@ -2311,7 +2323,7 @@ body:             |
   ; GFX90A-NEXT:   S_NOP 0
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.2:
-  ; GFX90A-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55
+  ; GFX90A-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   $agpr255 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.225, addrspace 5)
   ; GFX90A-NEXT:   $agpr254 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.224, addrspace 5)
@@ -2562,6 +2574,8 @@ body:             |
   ; GFX908-FLATSCR-NEXT:   S_NOP 0
   ; GFX908-FLATSCR-NEXT: {{  $}}
   ; GFX908-FLATSCR-NEXT: bb.2:
+  ; GFX908-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
+  ; GFX908-FLATSCR-NEXT: {{  $}}
   ; GFX908-FLATSCR-NEXT:   S_ENDPGM 0, amdgpu_allvgprs
   ; GFX90A-FLATSCR-LABEL: name: agpr96_restore_clobber_scc
   ; GFX90A-FLATSCR: bb.0:
@@ -2807,7 +2821,7 @@ body:             |
   ; GFX90A-FLATSCR-NEXT:   S_NOP 0
   ; GFX90A-FLATSCR-NEXT: {{  $}}
   ; GFX90A-FLATSCR-NEXT: bb.2:
-  ; GFX90A-FLATSCR-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55
+  ; GFX90A-FLATSCR-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
   ; GFX90A-FLATSCR-NEXT: {{  $}}
   ; GFX90A-FLATSCR-NEXT:   $agpr255 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.225, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   $agpr254 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.224, addrspace 5)
@@ -3045,6 +3059,7 @@ body:             |
     S_NOP 0
 
   bb.2:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
     S_ENDPGM 0, amdgpu_allvgprs
 ...
 
@@ -3081,6 +3096,8 @@ body:             |
   ; GFX908-NEXT:   S_NOP 0
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT: bb.2:
+  ; GFX908-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0
+  ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT:   S_ENDPGM 0, amdgpu_allvgprs
   ; GFX90A-LABEL: name: agpr32_save_clobber_scc
   ; GFX90A: bb.0:
@@ -3325,7 +3342,7 @@ body:             |
   ; GFX90A-NEXT:   S_NOP 0
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.2:
-  ; GFX90A-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55
+  ; GFX90A-NEXT:   liveins: $agpr0, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   $agpr255 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.225, addrspace 5)
   ; GFX90A-NEXT:   $agpr254 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.224, addrspace 5)
@@ -3572,6 +3589,8 @@ body:             |
   ; GFX908-FLATSCR-NEXT:   S_NOP 0
   ; GFX908-FLATSCR-NEXT: {{  $}}
   ; GFX908-FLATSCR-NEXT: bb.2:
+  ; GFX908-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0
+  ; GFX908-FLATSCR-NEXT: {{  $}}
   ; GFX908-FLATSCR-NEXT:   S_ENDPGM 0, amdgpu_allvgprs
   ; GFX90A-FLATSCR-LABEL: name: agpr32_save_clobber_scc
   ; GFX90A-FLATSCR: bb.0:
@@ -3817,7 +3836,7 @@ body:             |
   ; GFX90A-FLATSCR-NEXT:   S_NOP 0
   ; GFX90A-FLATSCR-NEXT: {{  $}}
   ; GFX90A-FLATSCR-NEXT: bb.2:
-  ; GFX90A-FLATSCR-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55
+  ; GFX90A-FLATSCR-NEXT:   liveins: $agpr0, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
   ; GFX90A-FLATSCR-NEXT: {{  $}}
   ; GFX90A-FLATSCR-NEXT:   $agpr255 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.225, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   $agpr254 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.224, addrspace 5)
@@ -4055,6 +4074,7 @@ body:             |
     S_NOP 0
 
   bb.2:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0
     S_ENDPGM 0, amdgpu_allvgprs
 ...
 
@@ -4092,6 +4112,8 @@ body:             |
   ; GFX908-NEXT:   S_NOP 0
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT: bb.2:
+  ; GFX908-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0_agpr1
+  ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT:   S_ENDPGM 0, amdgpu_allvgprs
   ; GFX90A-LABEL: name: agpr64_save_clobber_scc
   ; GFX90A: bb.0:
@@ -4337,7 +4359,7 @@ body:             |
   ; GFX90A-NEXT:   S_NOP 0
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.2:
-  ; GFX90A-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55
+  ; GFX90A-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $agpr0_agpr1, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   $agpr255 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.225, addrspace 5)
   ; GFX90A-NEXT:   $agpr254 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.224, addrspace 5)
@@ -4586,6 +4608,8 @@ body:             |
   ; GFX908-FLATSCR-NEXT:   S_NOP 0
   ; GFX908-FLATSCR-NEXT: {{  $}}
   ; GFX908-FLATSCR-NEXT: bb.2:
+  ; GFX908-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0_agpr1
+  ; GFX908-FLATSCR-NEXT: {{  $}}
   ; GFX908-FLATSCR-NEXT:   S_ENDPGM 0, amdgpu_allvgprs
   ; GFX90A-FLATSCR-LABEL: name: agpr64_save_clobber_scc
   ; GFX90A-FLATSCR: bb.0:
@@ -4831,7 +4855,7 @@ body:             |
   ; GFX90A-FLATSCR-NEXT:   S_NOP 0
   ; GFX90A-FLATSCR-NEXT: {{  $}}
   ; GFX90A-FLATSCR-NEXT: bb.2:
-  ; GFX90A-FLATSCR-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55
+  ; GFX90A-FLATSCR-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $agpr0_agpr1, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
   ; GFX90A-FLATSCR-NEXT: {{  $}}
   ; GFX90A-FLATSCR-NEXT:   $agpr255 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.225, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   $agpr254 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.224, addrspace 5)
@@ -5069,6 +5093,7 @@ body:             |
     S_NOP 0
 
   bb.2:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0_agpr1
     S_ENDPGM 0, amdgpu_allvgprs
 ...
 ---
@@ -5107,6 +5132,8 @@ body:             |
   ; GFX908-NEXT:   S_NOP 0
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT: bb.2:
+  ; GFX908-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0_agpr1
+  ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT:   S_ENDPGM 0, amdgpu_allvgprs
   ; GFX90A-LABEL: name: agpr96_save_clobber_scc
   ; GFX90A: bb.0:
@@ -5353,7 +5380,7 @@ body:             |
   ; GFX90A-NEXT:   S_NOP 0
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.2:
-  ; GFX90A-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55
+  ; GFX90A-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $agpr0_agpr1, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   $agpr255 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.225, addrspace 5)
   ; GFX90A-NEXT:   $agpr254 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.224, addrspace 5)
@@ -5604,6 +5631,8 @@ body:             |
   ; GFX908-FLATSCR-NEXT:   S_NOP 0
   ; GFX908-FLATSCR-NEXT: {{  $}}
   ; GFX908-FLATSCR-NEXT: bb.2:
+  ; GFX908-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0_agpr1
+  ; GFX908-FLATSCR-NEXT: {{  $}}
   ; GFX908-FLATSCR-NEXT:   S_ENDPGM 0, amdgpu_allvgprs
   ; GFX90A-FLATSCR-LABEL: name: agpr96_save_clobber_scc
   ; GFX90A-FLATSCR: bb.0:
@@ -5849,7 +5878,7 @@ body:             |
   ; GFX90A-FLATSCR-NEXT:   S_NOP 0
   ; GFX90A-FLATSCR-NEXT: {{  $}}
   ; GFX90A-FLATSCR-NEXT: bb.2:
-  ; GFX90A-FLATSCR-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55
+  ; GFX90A-FLATSCR-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $agpr0_agpr1, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
   ; GFX90A-FLATSCR-NEXT: {{  $}}
   ; GFX90A-FLATSCR-NEXT:   $agpr255 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.225, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   $agpr254 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.224, addrspace 5)
@@ -6087,5 +6116,6 @@ body:             |
     S_NOP 0
 
   bb.2:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0_agpr1
     S_ENDPGM 0, amdgpu_allvgprs
 ...

diff  --git a/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll b/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll
index 0a234a5c4cb02..44e5a0b0000a7 100644
--- a/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll
+++ b/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll
@@ -495,25 +495,25 @@ define amdgpu_kernel void @vload2_private(i16 addrspace(1)* nocapture readonly %
 ; FLATSCR-NEXT:    s_addc_u32 flat_scratch_hi, s3, 0
 ; FLATSCR-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; FLATSCR-NEXT:    v_mov_b32_e32 v2, 0
-; FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
+; FLATSCR-NEXT:    s_mov_b32 s5, 0
+; FLATSCR-NEXT:    s_mov_b32 s4, 0
+; FLATSCR-NEXT:    s_mov_b32 vcc_lo, 0
 ; FLATSCR-NEXT:    s_waitcnt lgkmcnt(0)
 ; FLATSCR-NEXT:    global_load_ushort v0, v2, s[0:1]
+; FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; FLATSCR-NEXT:    scratch_store_short off, v0, vcc_hi offset:4
+; FLATSCR-NEXT:    scratch_store_short off, v0, s5 offset:4
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
 ; FLATSCR-NEXT:    global_load_ushort v0, v2, s[0:1] offset:2
-; FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; FLATSCR-NEXT:    scratch_store_short off, v0, vcc_hi offset:6
+; FLATSCR-NEXT:    scratch_store_short off, v0, s4 offset:6
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
 ; FLATSCR-NEXT:    global_load_ushort v0, v2, s[0:1] offset:4
-; FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
+; FLATSCR-NEXT:    s_mov_b32 s0, 0
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; FLATSCR-NEXT:    scratch_store_short off, v0, vcc_hi offset:8
+; FLATSCR-NEXT:    scratch_store_short off, v0, s0 offset:8
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
-; FLATSCR-NEXT:    scratch_load_dword v0, off, vcc_hi offset:4
-; FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
+; FLATSCR-NEXT:    scratch_load_dword v0, off, vcc_lo offset:4
 ; FLATSCR-NEXT:    scratch_load_dword v1, off, vcc_hi offset:6
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
 ; FLATSCR-NEXT:    global_store_dwordx2 v2, v[0:1], s[2:3]
@@ -558,29 +558,27 @@ define amdgpu_kernel void @vload2_private(i16 addrspace(1)* nocapture readonly %
 ; FLATSCR_GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3
 ; FLATSCR_GFX10-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; FLATSCR_GFX10-NEXT:    v_mov_b32_e32 v2, 0
+; FLATSCR_GFX10-NEXT:    s_mov_b32 s5, 0
+; FLATSCR_GFX10-NEXT:    s_mov_b32 s4, 0
 ; FLATSCR_GFX10-NEXT:    s_mov_b32 vcc_lo, 0
 ; FLATSCR_GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; FLATSCR_GFX10-NEXT:    global_load_ushort v0, v2, s[0:1]
 ; FLATSCR_GFX10-NEXT:    s_waitcnt vmcnt(0)
-; FLATSCR_GFX10-NEXT:    scratch_store_short off, v0, vcc_lo offset:4
+; FLATSCR_GFX10-NEXT:    scratch_store_short off, v0, s5 offset:4
 ; FLATSCR_GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; FLATSCR_GFX10-NEXT:    global_load_ushort v0, v2, s[0:1] offset:2
-; FLATSCR_GFX10-NEXT:    s_waitcnt_depctr 0xffe3
-; FLATSCR_GFX10-NEXT:    s_mov_b32 vcc_lo, 0
 ; FLATSCR_GFX10-NEXT:    s_waitcnt vmcnt(0)
-; FLATSCR_GFX10-NEXT:    scratch_store_short off, v0, vcc_lo offset:6
+; FLATSCR_GFX10-NEXT:    scratch_store_short off, v0, s4 offset:6
 ; FLATSCR_GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; FLATSCR_GFX10-NEXT:    global_load_ushort v0, v2, s[0:1] offset:4
 ; FLATSCR_GFX10-NEXT:    s_waitcnt_depctr 0xffe3
-; FLATSCR_GFX10-NEXT:    s_mov_b32 vcc_lo, 0
+; FLATSCR_GFX10-NEXT:    s_mov_b32 s1, 0
+; FLATSCR_GFX10-NEXT:    s_mov_b32 s0, 0
 ; FLATSCR_GFX10-NEXT:    s_waitcnt vmcnt(0)
-; FLATSCR_GFX10-NEXT:    scratch_store_short off, v0, vcc_lo offset:8
+; FLATSCR_GFX10-NEXT:    scratch_store_short off, v0, s1 offset:8
 ; FLATSCR_GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
-; FLATSCR_GFX10-NEXT:    s_waitcnt_depctr 0xffe3
-; FLATSCR_GFX10-NEXT:    s_mov_b32 vcc_lo, 0
-; FLATSCR_GFX10-NEXT:    scratch_load_dword v0, off, vcc_lo offset:4
-; FLATSCR_GFX10-NEXT:    s_waitcnt_depctr 0xffe3
-; FLATSCR_GFX10-NEXT:    s_mov_b32 vcc_lo, 0
+; FLATSCR_GFX10-NEXT:    s_clause 0x1
+; FLATSCR_GFX10-NEXT:    scratch_load_dword v0, off, s0 offset:4
 ; FLATSCR_GFX10-NEXT:    scratch_load_dword v1, off, vcc_lo offset:6
 ; FLATSCR_GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; FLATSCR_GFX10-NEXT:    global_store_dwordx2 v2, v[0:1], s[2:3]

diff  --git a/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll
index 9edfbefa7fcde..91f3e3581fa2c 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll
@@ -121,16 +121,16 @@ define amdgpu_kernel void @test(i32 addrspace(1)* %out, i32 %in) {
 ; FLAT_SCR_OPT-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
 ; FLAT_SCR_OPT-NEXT:    s_mov_b32 s104, exec_lo
 ; FLAT_SCR_OPT-NEXT:    s_mov_b32 exec_lo, 3
-; FLAT_SCR_OPT-NEXT:    s_mov_b32 s105, 0
-; FLAT_SCR_OPT-NEXT:    scratch_store_dword off, v72, s105
+; FLAT_SCR_OPT-NEXT:    s_mov_b32 s4, 0
+; FLAT_SCR_OPT-NEXT:    scratch_store_dword off, v72, s4
 ; FLAT_SCR_OPT-NEXT:    s_waitcnt lgkmcnt(0)
 ; FLAT_SCR_OPT-NEXT:    v_writelane_b32 v72, s2, 0
-; FLAT_SCR_OPT-NEXT:    s_mov_b32 s105, 4
+; FLAT_SCR_OPT-NEXT:    s_mov_b32 s4, 4
 ; FLAT_SCR_OPT-NEXT:    v_writelane_b32 v72, s3, 1
-; FLAT_SCR_OPT-NEXT:    scratch_store_dword off, v72, s105 ; 4-byte Folded Spill
+; FLAT_SCR_OPT-NEXT:    scratch_store_dword off, v72, s4 ; 4-byte Folded Spill
 ; FLAT_SCR_OPT-NEXT:    s_waitcnt_depctr 0xffe3
-; FLAT_SCR_OPT-NEXT:    s_mov_b32 s105, 0
-; FLAT_SCR_OPT-NEXT:    scratch_load_dword v72, off, s105
+; FLAT_SCR_OPT-NEXT:    s_mov_b32 s4, 0
+; FLAT_SCR_OPT-NEXT:    scratch_load_dword v72, off, s4
 ; FLAT_SCR_OPT-NEXT:    s_waitcnt vmcnt(0)
 ; FLAT_SCR_OPT-NEXT:    s_waitcnt_depctr 0xffe3
 ; FLAT_SCR_OPT-NEXT:    s_mov_b32 exec_lo, s104
@@ -255,16 +255,16 @@ define amdgpu_kernel void @test(i32 addrspace(1)* %out, i32 %in) {
 ; FLAT_SCR_ARCH-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
 ; FLAT_SCR_ARCH-NEXT:    s_mov_b32 s104, exec_lo
 ; FLAT_SCR_ARCH-NEXT:    s_mov_b32 exec_lo, 3
-; FLAT_SCR_ARCH-NEXT:    s_mov_b32 s105, 0
-; FLAT_SCR_ARCH-NEXT:    scratch_store_dword off, v72, s105
+; FLAT_SCR_ARCH-NEXT:    s_mov_b32 s4, 0
+; FLAT_SCR_ARCH-NEXT:    scratch_store_dword off, v72, s4
 ; FLAT_SCR_ARCH-NEXT:    s_waitcnt lgkmcnt(0)
 ; FLAT_SCR_ARCH-NEXT:    v_writelane_b32 v72, s2, 0
-; FLAT_SCR_ARCH-NEXT:    s_mov_b32 s105, 4
+; FLAT_SCR_ARCH-NEXT:    s_mov_b32 s4, 4
 ; FLAT_SCR_ARCH-NEXT:    v_writelane_b32 v72, s3, 1
-; FLAT_SCR_ARCH-NEXT:    scratch_store_dword off, v72, s105 ; 4-byte Folded Spill
+; FLAT_SCR_ARCH-NEXT:    scratch_store_dword off, v72, s4 ; 4-byte Folded Spill
 ; FLAT_SCR_ARCH-NEXT:    s_waitcnt_depctr 0xffe3
-; FLAT_SCR_ARCH-NEXT:    s_mov_b32 s105, 0
-; FLAT_SCR_ARCH-NEXT:    scratch_load_dword v72, off, s105
+; FLAT_SCR_ARCH-NEXT:    s_mov_b32 s4, 0
+; FLAT_SCR_ARCH-NEXT:    scratch_load_dword v72, off, s4
 ; FLAT_SCR_ARCH-NEXT:    s_waitcnt vmcnt(0)
 ; FLAT_SCR_ARCH-NEXT:    s_waitcnt_depctr 0xffe3
 ; FLAT_SCR_ARCH-NEXT:    s_mov_b32 exec_lo, s104

diff  --git a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll
index bb3133d99a157..d00a446ae0397 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll
@@ -21,13 +21,13 @@ define amdgpu_kernel void @zero_init_kernel() {
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX9-NEXT:    v_mov_b32_e32 v2, s2
 ; GFX9-NEXT:    v_mov_b32_e32 v3, s3
+; GFX9-NEXT:    s_mov_b32 s1, 0
+; GFX9-NEXT:    s_mov_b32 s0, 0
+; GFX9-NEXT:    s_mov_b32 vcc_lo, 0
 ; GFX9-NEXT:    s_mov_b32 vcc_hi, 0
-; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:52
-; GFX9-NEXT:    s_mov_b32 vcc_hi, 0
-; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:36
-; GFX9-NEXT:    s_mov_b32 vcc_hi, 0
-; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:20
-; GFX9-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], s1 offset:52
+; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:36
+; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:20
 ; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:4
 ; GFX9-NEXT:    s_endpgm
 ;
@@ -74,6 +74,7 @@ define amdgpu_kernel void @zero_init_kernel() {
 ; GFX9-PAL-NEXT:    s_mov_b32 s2, s0
 ; GFX9-PAL-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
 ; GFX9-PAL-NEXT:    s_mov_b32 s0, 0
+; GFX9-PAL-NEXT:    s_mov_b32 vcc_lo, 0
 ; GFX9-PAL-NEXT:    s_mov_b32 vcc_hi, 0
 ; GFX9-PAL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-PAL-NEXT:    s_and_b32 s3, s3, 0xffff
@@ -86,12 +87,11 @@ define amdgpu_kernel void @zero_init_kernel() {
 ; GFX9-PAL-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX9-PAL-NEXT:    v_mov_b32_e32 v2, s2
 ; GFX9-PAL-NEXT:    v_mov_b32_e32 v3, s3
-; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:52
-; GFX9-PAL-NEXT:    s_mov_b32 vcc_hi, 0
-; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:36
-; GFX9-PAL-NEXT:    s_mov_b32 vcc_hi, 0
-; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:20
-; GFX9-PAL-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-PAL-NEXT:    s_mov_b32 s1, 0
+; GFX9-PAL-NEXT:    s_mov_b32 s0, 0
+; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s1 offset:52
+; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:36
+; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:20
 ; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:4
 ; GFX9-PAL-NEXT:    s_endpgm
 ;
@@ -129,15 +129,12 @@ define amdgpu_kernel void @zero_init_kernel() {
 ; GFX1010-PAL-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX1010-PAL-NEXT:    v_mov_b32_e32 v2, s2
 ; GFX1010-PAL-NEXT:    v_mov_b32_e32 v3, s3
-; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:52
-; GFX1010-PAL-NEXT:    s_waitcnt_depctr 0xffe3
-; GFX1010-PAL-NEXT:    s_mov_b32 vcc_lo, 0
-; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:36
-; GFX1010-PAL-NEXT:    s_waitcnt_depctr 0xffe3
-; GFX1010-PAL-NEXT:    s_mov_b32 vcc_lo, 0
-; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:20
-; GFX1010-PAL-NEXT:    s_waitcnt_depctr 0xffe3
-; GFX1010-PAL-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX1010-PAL-NEXT:    s_mov_b32 s2, 0
+; GFX1010-PAL-NEXT:    s_mov_b32 s1, 0
+; GFX1010-PAL-NEXT:    s_mov_b32 s0, 0
+; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s2 offset:52
+; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s1 offset:36
+; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:20
 ; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:4
 ; GFX1010-PAL-NEXT:    s_endpgm
 ;
@@ -971,8 +968,8 @@ define amdgpu_kernel void @zero_init_small_offset_kernel() {
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_add_u32 flat_scratch_lo, s0, s3
 ; GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s1, 0
-; GFX9-NEXT:    s_mov_b32 vcc_hi, 0
-; GFX9-NEXT:    scratch_load_dword v0, off, vcc_hi offset:4 glc
+; GFX9-NEXT:    s_mov_b32 s4, 0
+; GFX9-NEXT:    scratch_load_dword v0, off, s4 offset:4 glc
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_mov_b32 s0, 0
 ; GFX9-NEXT:    s_mov_b32 s1, s0
@@ -982,13 +979,13 @@ define amdgpu_kernel void @zero_init_small_offset_kernel() {
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX9-NEXT:    v_mov_b32_e32 v2, s2
 ; GFX9-NEXT:    v_mov_b32_e32 v3, s3
+; GFX9-NEXT:    s_mov_b32 s1, 0
+; GFX9-NEXT:    s_mov_b32 s0, 0
+; GFX9-NEXT:    s_mov_b32 vcc_lo, 0
 ; GFX9-NEXT:    s_mov_b32 vcc_hi, 0
-; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:260
-; GFX9-NEXT:    s_mov_b32 vcc_hi, 0
-; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:276
-; GFX9-NEXT:    s_mov_b32 vcc_hi, 0
-; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:292
-; GFX9-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], s1 offset:260
+; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:276
+; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:292
 ; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:308
 ; GFX9-NEXT:    s_endpgm
 ;
@@ -1038,13 +1035,15 @@ define amdgpu_kernel void @zero_init_small_offset_kernel() {
 ; GFX9-PAL-NEXT:    s_getpc_b64 s[2:3]
 ; GFX9-PAL-NEXT:    s_mov_b32 s2, s0
 ; GFX9-PAL-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
-; GFX9-PAL-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-PAL-NEXT:    s_mov_b32 s4, 0
 ; GFX9-PAL-NEXT:    s_mov_b32 s0, 0
+; GFX9-PAL-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX9-PAL-NEXT:    s_mov_b32 vcc_hi, 0
 ; GFX9-PAL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-PAL-NEXT:    s_and_b32 s3, s3, 0xffff
 ; GFX9-PAL-NEXT:    s_add_u32 flat_scratch_lo, s2, s1
 ; GFX9-PAL-NEXT:    s_addc_u32 flat_scratch_hi, s3, 0
-; GFX9-PAL-NEXT:    scratch_load_dword v0, off, vcc_hi offset:4 glc
+; GFX9-PAL-NEXT:    scratch_load_dword v0, off, s4 offset:4 glc
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_mov_b32 s1, s0
 ; GFX9-PAL-NEXT:    s_mov_b32 s2, s0
@@ -1053,13 +1052,11 @@ define amdgpu_kernel void @zero_init_small_offset_kernel() {
 ; GFX9-PAL-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX9-PAL-NEXT:    v_mov_b32_e32 v2, s2
 ; GFX9-PAL-NEXT:    v_mov_b32_e32 v3, s3
-; GFX9-PAL-NEXT:    s_mov_b32 vcc_hi, 0
-; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:260
-; GFX9-PAL-NEXT:    s_mov_b32 vcc_hi, 0
-; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:276
-; GFX9-PAL-NEXT:    s_mov_b32 vcc_hi, 0
-; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:292
-; GFX9-PAL-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-PAL-NEXT:    s_mov_b32 s1, 0
+; GFX9-PAL-NEXT:    s_mov_b32 s0, 0
+; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s1 offset:260
+; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:276
+; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:292
 ; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:308
 ; GFX9-PAL-NEXT:    s_endpgm
 ;
@@ -1090,9 +1087,9 @@ define amdgpu_kernel void @zero_init_small_offset_kernel() {
 ; GFX1010-PAL-NEXT:    s_addc_u32 s3, s3, 0
 ; GFX1010-PAL-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2
 ; GFX1010-PAL-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3
-; GFX1010-PAL-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX1010-PAL-NEXT:    s_mov_b32 s4, 0
 ; GFX1010-PAL-NEXT:    s_mov_b32 s0, 0
-; GFX1010-PAL-NEXT:    scratch_load_dword v0, off, vcc_lo offset:4 glc dlc
+; GFX1010-PAL-NEXT:    scratch_load_dword v0, off, s4 offset:4 glc dlc
 ; GFX1010-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX1010-PAL-NEXT:    s_mov_b32 s1, s0
 ; GFX1010-PAL-NEXT:    s_mov_b32 s2, s0
@@ -1101,16 +1098,13 @@ define amdgpu_kernel void @zero_init_small_offset_kernel() {
 ; GFX1010-PAL-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX1010-PAL-NEXT:    v_mov_b32_e32 v2, s2
 ; GFX1010-PAL-NEXT:    v_mov_b32_e32 v3, s3
+; GFX1010-PAL-NEXT:    s_mov_b32 s2, 0
+; GFX1010-PAL-NEXT:    s_mov_b32 s1, 0
+; GFX1010-PAL-NEXT:    s_mov_b32 s0, 0
 ; GFX1010-PAL-NEXT:    s_mov_b32 vcc_lo, 0
-; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:260
-; GFX1010-PAL-NEXT:    s_waitcnt_depctr 0xffe3
-; GFX1010-PAL-NEXT:    s_mov_b32 vcc_lo, 0
-; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:276
-; GFX1010-PAL-NEXT:    s_waitcnt_depctr 0xffe3
-; GFX1010-PAL-NEXT:    s_mov_b32 vcc_lo, 0
-; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:292
-; GFX1010-PAL-NEXT:    s_waitcnt_depctr 0xffe3
-; GFX1010-PAL-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s2 offset:260
+; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s1 offset:276
+; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:292
 ; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:308
 ; GFX1010-PAL-NEXT:    s_endpgm
 ;
@@ -1888,13 +1882,13 @@ define void @store_load_vindex_small_offset_foo(i32 %idx) {
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-NEXT:    v_and_b32_e32 v1, 15, v0
+; GFX10-NEXT:    s_add_i32 s0, s32, 0x100
 ; GFX10-NEXT:    s_add_i32 vcc_lo, s32, 0x100
+; GFX10-NEXT:    v_lshl_add_u32 v0, v0, 2, s0
 ; GFX10-NEXT:    v_mov_b32_e32 v2, 15
-; GFX10-NEXT:    v_lshl_add_u32 v0, v0, 2, vcc_lo
-; GFX10-NEXT:    s_add_i32 vcc_lo, s32, 0x100
+; GFX10-NEXT:    v_lshl_add_u32 v1, v1, 2, vcc_lo
 ; GFX10-NEXT:    scratch_load_dword v3, off, s32 glc dlc
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    v_lshl_add_u32 v1, v1, 2, vcc_lo
 ; GFX10-NEXT:    scratch_store_dword v0, v2, off
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-NEXT:    scratch_load_dword v0, v1, off glc dlc
@@ -1953,13 +1947,13 @@ define void @store_load_vindex_small_offset_foo(i32 %idx) {
 ; GFX10-PAL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-PAL-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-PAL-NEXT:    v_and_b32_e32 v1, 15, v0
+; GFX10-PAL-NEXT:    s_add_i32 s0, s32, 0x100
 ; GFX10-PAL-NEXT:    s_add_i32 vcc_lo, s32, 0x100
+; GFX10-PAL-NEXT:    v_lshl_add_u32 v0, v0, 2, s0
 ; GFX10-PAL-NEXT:    v_mov_b32_e32 v2, 15
-; GFX10-PAL-NEXT:    v_lshl_add_u32 v0, v0, 2, vcc_lo
-; GFX10-PAL-NEXT:    s_add_i32 vcc_lo, s32, 0x100
+; GFX10-PAL-NEXT:    v_lshl_add_u32 v1, v1, 2, vcc_lo
 ; GFX10-PAL-NEXT:    scratch_load_dword v3, off, s32 glc dlc
 ; GFX10-PAL-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-PAL-NEXT:    v_lshl_add_u32 v1, v1, 2, vcc_lo
 ; GFX10-PAL-NEXT:    scratch_store_dword v0, v2, off
 ; GFX10-PAL-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-PAL-NEXT:    scratch_load_dword v0, v1, off glc dlc
@@ -2015,8 +2009,8 @@ define amdgpu_kernel void @zero_init_large_offset_kernel() {
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_add_u32 flat_scratch_lo, s0, s3
 ; GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s1, 0
-; GFX9-NEXT:    s_mov_b32 vcc_hi, 0
-; GFX9-NEXT:    scratch_load_dword v0, off, vcc_hi offset:4 glc
+; GFX9-NEXT:    s_mov_b32 s4, 0
+; GFX9-NEXT:    scratch_load_dword v0, off, s4 offset:4 glc
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_mov_b32 s0, 0
 ; GFX9-NEXT:    s_mov_b32 s1, s0
@@ -2026,13 +2020,13 @@ define amdgpu_kernel void @zero_init_large_offset_kernel() {
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX9-NEXT:    v_mov_b32_e32 v2, s2
 ; GFX9-NEXT:    v_mov_b32_e32 v3, s3
+; GFX9-NEXT:    s_movk_i32 s1, 0x4004
+; GFX9-NEXT:    s_movk_i32 s0, 0x4004
+; GFX9-NEXT:    s_movk_i32 vcc_lo, 0x4004
 ; GFX9-NEXT:    s_movk_i32 vcc_hi, 0x4004
-; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi
-; GFX9-NEXT:    s_movk_i32 vcc_hi, 0x4004
-; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:16
-; GFX9-NEXT:    s_movk_i32 vcc_hi, 0x4004
-; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:32
-; GFX9-NEXT:    s_movk_i32 vcc_hi, 0x4004
+; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], s1
+; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:16
+; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:32
 ; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:48
 ; GFX9-NEXT:    s_endpgm
 ;
@@ -2053,12 +2047,12 @@ define amdgpu_kernel void @zero_init_large_offset_kernel() {
 ; GFX10-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX10-NEXT:    v_mov_b32_e32 v2, s2
 ; GFX10-NEXT:    v_mov_b32_e32 v3, s3
-; GFX10-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo
-; GFX10-NEXT:    s_movk_i32 vcc_lo, 0x4004
-; GFX10-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:16
-; GFX10-NEXT:    s_movk_i32 vcc_lo, 0x4004
-; GFX10-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:32
-; GFX10-NEXT:    s_movk_i32 vcc_lo, 0x4004
+; GFX10-NEXT:    s_movk_i32 s2, 0x4004
+; GFX10-NEXT:    s_movk_i32 s1, 0x4004
+; GFX10-NEXT:    s_movk_i32 s0, 0x4004
+; GFX10-NEXT:    scratch_store_dwordx4 off, v[0:3], s2
+; GFX10-NEXT:    scratch_store_dwordx4 off, v[0:3], s1 offset:16
+; GFX10-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:32
 ; GFX10-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:48
 ; GFX10-NEXT:    s_endpgm
 ;
@@ -2073,12 +2067,13 @@ define amdgpu_kernel void @zero_init_large_offset_kernel() {
 ; GFX11-NEXT:    s_mov_b32 s3, s0
 ; GFX11-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
 ; GFX11-NEXT:    v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
-; GFX11-NEXT:    scratch_store_b128 off, v[0:3], vcc_lo
-; GFX11-NEXT:    s_movk_i32 vcc_lo, 0x4004
-; GFX11-NEXT:    scratch_store_b128 off, v[0:3], vcc_lo offset:16
-; GFX11-NEXT:    s_movk_i32 vcc_lo, 0x4004
-; GFX11-NEXT:    scratch_store_b128 off, v[0:3], vcc_lo offset:32
-; GFX11-NEXT:    s_movk_i32 vcc_lo, 0x4004
+; GFX11-NEXT:    s_movk_i32 s2, 0x4004
+; GFX11-NEXT:    s_movk_i32 s1, 0x4004
+; GFX11-NEXT:    s_movk_i32 s0, 0x4004
+; GFX11-NEXT:    s_clause 0x3
+; GFX11-NEXT:    scratch_store_b128 off, v[0:3], s2
+; GFX11-NEXT:    scratch_store_b128 off, v[0:3], s1 offset:16
+; GFX11-NEXT:    scratch_store_b128 off, v[0:3], s0 offset:32
 ; GFX11-NEXT:    scratch_store_b128 off, v[0:3], vcc_lo offset:48
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
@@ -2088,13 +2083,15 @@ define amdgpu_kernel void @zero_init_large_offset_kernel() {
 ; GFX9-PAL-NEXT:    s_getpc_b64 s[2:3]
 ; GFX9-PAL-NEXT:    s_mov_b32 s2, s0
 ; GFX9-PAL-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
-; GFX9-PAL-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-PAL-NEXT:    s_mov_b32 s4, 0
 ; GFX9-PAL-NEXT:    s_mov_b32 s0, 0
+; GFX9-PAL-NEXT:    s_movk_i32 vcc_lo, 0x4004
+; GFX9-PAL-NEXT:    s_movk_i32 vcc_hi, 0x4004
 ; GFX9-PAL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-PAL-NEXT:    s_and_b32 s3, s3, 0xffff
 ; GFX9-PAL-NEXT:    s_add_u32 flat_scratch_lo, s2, s1
 ; GFX9-PAL-NEXT:    s_addc_u32 flat_scratch_hi, s3, 0
-; GFX9-PAL-NEXT:    scratch_load_dword v0, off, vcc_hi offset:4 glc
+; GFX9-PAL-NEXT:    scratch_load_dword v0, off, s4 offset:4 glc
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_mov_b32 s1, s0
 ; GFX9-PAL-NEXT:    s_mov_b32 s2, s0
@@ -2103,13 +2100,11 @@ define amdgpu_kernel void @zero_init_large_offset_kernel() {
 ; GFX9-PAL-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX9-PAL-NEXT:    v_mov_b32_e32 v2, s2
 ; GFX9-PAL-NEXT:    v_mov_b32_e32 v3, s3
-; GFX9-PAL-NEXT:    s_movk_i32 vcc_hi, 0x4004
-; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi
-; GFX9-PAL-NEXT:    s_movk_i32 vcc_hi, 0x4004
-; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:16
-; GFX9-PAL-NEXT:    s_movk_i32 vcc_hi, 0x4004
-; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:32
-; GFX9-PAL-NEXT:    s_movk_i32 vcc_hi, 0x4004
+; GFX9-PAL-NEXT:    s_movk_i32 s1, 0x4004
+; GFX9-PAL-NEXT:    s_movk_i32 s0, 0x4004
+; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s1
+; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:16
+; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:32
 ; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:48
 ; GFX9-PAL-NEXT:    s_endpgm
 ;
@@ -2123,13 +2118,13 @@ define amdgpu_kernel void @zero_init_large_offset_kernel() {
 ; GFX940-NEXT:    s_mov_b32 s3, s0
 ; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
 ; GFX940-NEXT:    v_mov_b64_e32 v[2:3], s[2:3]
+; GFX940-NEXT:    s_movk_i32 s1, 0x4004
+; GFX940-NEXT:    s_movk_i32 s0, 0x4004
+; GFX940-NEXT:    s_movk_i32 vcc_lo, 0x4004
 ; GFX940-NEXT:    s_movk_i32 vcc_hi, 0x4004
-; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi
-; GFX940-NEXT:    s_movk_i32 vcc_hi, 0x4004
-; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:16
-; GFX940-NEXT:    s_movk_i32 vcc_hi, 0x4004
-; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:32
-; GFX940-NEXT:    s_movk_i32 vcc_hi, 0x4004
+; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], s1
+; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:16
+; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:32
 ; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:48
 ; GFX940-NEXT:    s_endpgm
 ;
@@ -2144,9 +2139,9 @@ define amdgpu_kernel void @zero_init_large_offset_kernel() {
 ; GFX1010-PAL-NEXT:    s_addc_u32 s3, s3, 0
 ; GFX1010-PAL-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2
 ; GFX1010-PAL-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3
-; GFX1010-PAL-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX1010-PAL-NEXT:    s_mov_b32 s4, 0
 ; GFX1010-PAL-NEXT:    s_mov_b32 s0, 0
-; GFX1010-PAL-NEXT:    scratch_load_dword v0, off, vcc_lo offset:4 glc dlc
+; GFX1010-PAL-NEXT:    scratch_load_dword v0, off, s4 offset:4 glc dlc
 ; GFX1010-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX1010-PAL-NEXT:    s_mov_b32 s1, s0
 ; GFX1010-PAL-NEXT:    s_mov_b32 s2, s0
@@ -2155,16 +2150,13 @@ define amdgpu_kernel void @zero_init_large_offset_kernel() {
 ; GFX1010-PAL-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX1010-PAL-NEXT:    v_mov_b32_e32 v2, s2
 ; GFX1010-PAL-NEXT:    v_mov_b32_e32 v3, s3
+; GFX1010-PAL-NEXT:    s_movk_i32 s2, 0x4004
+; GFX1010-PAL-NEXT:    s_movk_i32 s1, 0x4004
+; GFX1010-PAL-NEXT:    s_movk_i32 s0, 0x4004
 ; GFX1010-PAL-NEXT:    s_movk_i32 vcc_lo, 0x4004
-; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo
-; GFX1010-PAL-NEXT:    s_waitcnt_depctr 0xffe3
-; GFX1010-PAL-NEXT:    s_movk_i32 vcc_lo, 0x4004
-; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:16
-; GFX1010-PAL-NEXT:    s_waitcnt_depctr 0xffe3
-; GFX1010-PAL-NEXT:    s_movk_i32 vcc_lo, 0x4004
-; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:32
-; GFX1010-PAL-NEXT:    s_waitcnt_depctr 0xffe3
-; GFX1010-PAL-NEXT:    s_movk_i32 vcc_lo, 0x4004
+; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s2
+; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s1 offset:16
+; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:32
 ; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:48
 ; GFX1010-PAL-NEXT:    s_endpgm
 ;
@@ -2190,12 +2182,12 @@ define amdgpu_kernel void @zero_init_large_offset_kernel() {
 ; GFX1030-PAL-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX1030-PAL-NEXT:    v_mov_b32_e32 v2, s2
 ; GFX1030-PAL-NEXT:    v_mov_b32_e32 v3, s3
-; GFX1030-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo
-; GFX1030-PAL-NEXT:    s_movk_i32 vcc_lo, 0x4004
-; GFX1030-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:16
-; GFX1030-PAL-NEXT:    s_movk_i32 vcc_lo, 0x4004
-; GFX1030-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:32
-; GFX1030-PAL-NEXT:    s_movk_i32 vcc_lo, 0x4004
+; GFX1030-PAL-NEXT:    s_movk_i32 s2, 0x4004
+; GFX1030-PAL-NEXT:    s_movk_i32 s1, 0x4004
+; GFX1030-PAL-NEXT:    s_movk_i32 s0, 0x4004
+; GFX1030-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s2
+; GFX1030-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s1 offset:16
+; GFX1030-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:32
 ; GFX1030-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:48
 ; GFX1030-PAL-NEXT:    s_endpgm
 ;
@@ -2210,12 +2202,13 @@ define amdgpu_kernel void @zero_init_large_offset_kernel() {
 ; GFX11-PAL-NEXT:    s_mov_b32 s3, s0
 ; GFX11-PAL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
 ; GFX11-PAL-NEXT:    v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
-; GFX11-PAL-NEXT:    scratch_store_b128 off, v[0:3], vcc_lo
-; GFX11-PAL-NEXT:    s_movk_i32 vcc_lo, 0x4004
-; GFX11-PAL-NEXT:    scratch_store_b128 off, v[0:3], vcc_lo offset:16
-; GFX11-PAL-NEXT:    s_movk_i32 vcc_lo, 0x4004
-; GFX11-PAL-NEXT:    scratch_store_b128 off, v[0:3], vcc_lo offset:32
-; GFX11-PAL-NEXT:    s_movk_i32 vcc_lo, 0x4004
+; GFX11-PAL-NEXT:    s_movk_i32 s2, 0x4004
+; GFX11-PAL-NEXT:    s_movk_i32 s1, 0x4004
+; GFX11-PAL-NEXT:    s_movk_i32 s0, 0x4004
+; GFX11-PAL-NEXT:    s_clause 0x3
+; GFX11-PAL-NEXT:    scratch_store_b128 off, v[0:3], s2
+; GFX11-PAL-NEXT:    scratch_store_b128 off, v[0:3], s1 offset:16
+; GFX11-PAL-NEXT:    scratch_store_b128 off, v[0:3], s0 offset:32
 ; GFX11-PAL-NEXT:    scratch_store_b128 off, v[0:3], vcc_lo offset:48
 ; GFX11-PAL-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-PAL-NEXT:    s_endpgm
@@ -2242,13 +2235,13 @@ define void @zero_init_large_offset_foo() {
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX9-NEXT:    v_mov_b32_e32 v2, s2
 ; GFX9-NEXT:    v_mov_b32_e32 v3, s3
+; GFX9-NEXT:    s_add_i32 s1, s32, 0x4004
+; GFX9-NEXT:    s_add_i32 s0, s32, 0x4004
+; GFX9-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
 ; GFX9-NEXT:    s_add_i32 vcc_hi, s32, 0x4004
-; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi
-; GFX9-NEXT:    s_add_i32 vcc_hi, s32, 0x4004
-; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:16
-; GFX9-NEXT:    s_add_i32 vcc_hi, s32, 0x4004
-; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:32
-; GFX9-NEXT:    s_add_i32 vcc_hi, s32, 0x4004
+; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], s1
+; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:16
+; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:32
 ; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:48
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
@@ -2260,7 +2253,6 @@ define void @zero_init_large_offset_foo() {
 ; GFX10-NEXT:    scratch_load_dword v0, off, s32 offset:4 glc dlc
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    s_mov_b32 s0, 0
-; GFX10-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
 ; GFX10-NEXT:    s_mov_b32 s1, s0
 ; GFX10-NEXT:    s_mov_b32 s2, s0
 ; GFX10-NEXT:    s_mov_b32 s3, s0
@@ -2268,12 +2260,13 @@ define void @zero_init_large_offset_foo() {
 ; GFX10-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX10-NEXT:    v_mov_b32_e32 v2, s2
 ; GFX10-NEXT:    v_mov_b32_e32 v3, s3
-; GFX10-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo
-; GFX10-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
-; GFX10-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:16
-; GFX10-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
-; GFX10-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:32
+; GFX10-NEXT:    s_add_i32 s2, s32, 0x4004
+; GFX10-NEXT:    s_add_i32 s1, s32, 0x4004
+; GFX10-NEXT:    s_add_i32 s0, s32, 0x4004
 ; GFX10-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
+; GFX10-NEXT:    scratch_store_dwordx4 off, v[0:3], s2
+; GFX10-NEXT:    scratch_store_dwordx4 off, v[0:3], s1 offset:16
+; GFX10-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:32
 ; GFX10-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:48
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
@@ -2285,18 +2278,20 @@ define void @zero_init_large_offset_foo() {
 ; GFX11-NEXT:    scratch_load_b32 v0, off, s32 offset:4 glc dlc
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    s_mov_b32 s0, 0
-; GFX11-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
+; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
 ; GFX11-NEXT:    s_mov_b32 s1, s0
 ; GFX11-NEXT:    s_mov_b32 s2, s0
 ; GFX11-NEXT:    s_mov_b32 s3, s0
 ; GFX11-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
 ; GFX11-NEXT:    v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
-; GFX11-NEXT:    scratch_store_b128 off, v[0:3], vcc_lo
-; GFX11-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
-; GFX11-NEXT:    scratch_store_b128 off, v[0:3], vcc_lo offset:16
-; GFX11-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
-; GFX11-NEXT:    scratch_store_b128 off, v[0:3], vcc_lo offset:32
+; GFX11-NEXT:    s_add_i32 s2, s32, 0x4004
+; GFX11-NEXT:    s_add_i32 s1, s32, 0x4004
+; GFX11-NEXT:    s_add_i32 s0, s32, 0x4004
 ; GFX11-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
+; GFX11-NEXT:    s_clause 0x3
+; GFX11-NEXT:    scratch_store_b128 off, v[0:3], s2
+; GFX11-NEXT:    scratch_store_b128 off, v[0:3], s1 offset:16
+; GFX11-NEXT:    scratch_store_b128 off, v[0:3], s0 offset:32
 ; GFX11-NEXT:    scratch_store_b128 off, v[0:3], vcc_lo offset:48
 ; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
@@ -2314,13 +2309,13 @@ define void @zero_init_large_offset_foo() {
 ; GFX9-PAL-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX9-PAL-NEXT:    v_mov_b32_e32 v2, s2
 ; GFX9-PAL-NEXT:    v_mov_b32_e32 v3, s3
+; GFX9-PAL-NEXT:    s_add_i32 s1, s32, 0x4004
+; GFX9-PAL-NEXT:    s_add_i32 s0, s32, 0x4004
+; GFX9-PAL-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
 ; GFX9-PAL-NEXT:    s_add_i32 vcc_hi, s32, 0x4004
-; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi
-; GFX9-PAL-NEXT:    s_add_i32 vcc_hi, s32, 0x4004
-; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:16
-; GFX9-PAL-NEXT:    s_add_i32 vcc_hi, s32, 0x4004
-; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:32
-; GFX9-PAL-NEXT:    s_add_i32 vcc_hi, s32, 0x4004
+; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s1
+; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:16
+; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:32
 ; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:48
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_setpc_b64 s[30:31]
@@ -2336,69 +2331,41 @@ define void @zero_init_large_offset_foo() {
 ; GFX940-NEXT:    s_mov_b32 s3, s0
 ; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
 ; GFX940-NEXT:    v_mov_b64_e32 v[2:3], s[2:3]
+; GFX940-NEXT:    s_add_i32 s1, s32, 0x4004
+; GFX940-NEXT:    s_add_i32 s0, s32, 0x4004
+; GFX940-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
 ; GFX940-NEXT:    s_add_i32 vcc_hi, s32, 0x4004
-; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi
-; GFX940-NEXT:    s_add_i32 vcc_hi, s32, 0x4004
-; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:16
-; GFX940-NEXT:    s_add_i32 vcc_hi, s32, 0x4004
-; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:32
-; GFX940-NEXT:    s_add_i32 vcc_hi, s32, 0x4004
+; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], s1
+; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:16
+; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:32
 ; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:48
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1010-PAL-LABEL: zero_init_large_offset_foo:
-; GFX1010-PAL:       ; %bb.0:
-; GFX1010-PAL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1010-PAL-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX1010-PAL-NEXT:    scratch_load_dword v0, off, s32 offset:4 glc dlc
-; GFX1010-PAL-NEXT:    s_waitcnt vmcnt(0)
-; GFX1010-PAL-NEXT:    s_mov_b32 s0, 0
-; GFX1010-PAL-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
-; GFX1010-PAL-NEXT:    s_mov_b32 s1, s0
-; GFX1010-PAL-NEXT:    s_mov_b32 s2, s0
-; GFX1010-PAL-NEXT:    s_mov_b32 s3, s0
-; GFX1010-PAL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX1010-PAL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX1010-PAL-NEXT:    v_mov_b32_e32 v2, s2
-; GFX1010-PAL-NEXT:    v_mov_b32_e32 v3, s3
-; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo
-; GFX1010-PAL-NEXT:    s_waitcnt_depctr 0xffe3
-; GFX1010-PAL-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
-; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:16
-; GFX1010-PAL-NEXT:    s_waitcnt_depctr 0xffe3
-; GFX1010-PAL-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
-; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:32
-; GFX1010-PAL-NEXT:    s_waitcnt_depctr 0xffe3
-; GFX1010-PAL-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
-; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:48
-; GFX1010-PAL-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX1010-PAL-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX1030-PAL-LABEL: zero_init_large_offset_foo:
-; GFX1030-PAL:       ; %bb.0:
-; GFX1030-PAL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-PAL-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX1030-PAL-NEXT:    scratch_load_dword v0, off, s32 offset:4 glc dlc
-; GFX1030-PAL-NEXT:    s_waitcnt vmcnt(0)
-; GFX1030-PAL-NEXT:    s_mov_b32 s0, 0
-; GFX1030-PAL-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
-; GFX1030-PAL-NEXT:    s_mov_b32 s1, s0
-; GFX1030-PAL-NEXT:    s_mov_b32 s2, s0
-; GFX1030-PAL-NEXT:    s_mov_b32 s3, s0
-; GFX1030-PAL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX1030-PAL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX1030-PAL-NEXT:    v_mov_b32_e32 v2, s2
-; GFX1030-PAL-NEXT:    v_mov_b32_e32 v3, s3
-; GFX1030-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo
-; GFX1030-PAL-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
-; GFX1030-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:16
-; GFX1030-PAL-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
-; GFX1030-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:32
-; GFX1030-PAL-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
-; GFX1030-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:48
-; GFX1030-PAL-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX1030-PAL-NEXT:    s_setpc_b64 s[30:31]
+; GFX10-PAL-LABEL: zero_init_large_offset_foo:
+; GFX10-PAL:       ; %bb.0:
+; GFX10-PAL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-PAL-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-PAL-NEXT:    scratch_load_dword v0, off, s32 offset:4 glc dlc
+; GFX10-PAL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-PAL-NEXT:    s_mov_b32 s0, 0
+; GFX10-PAL-NEXT:    s_mov_b32 s1, s0
+; GFX10-PAL-NEXT:    s_mov_b32 s2, s0
+; GFX10-PAL-NEXT:    s_mov_b32 s3, s0
+; GFX10-PAL-NEXT:    v_mov_b32_e32 v0, s0
+; GFX10-PAL-NEXT:    v_mov_b32_e32 v1, s1
+; GFX10-PAL-NEXT:    v_mov_b32_e32 v2, s2
+; GFX10-PAL-NEXT:    v_mov_b32_e32 v3, s3
+; GFX10-PAL-NEXT:    s_add_i32 s2, s32, 0x4004
+; GFX10-PAL-NEXT:    s_add_i32 s1, s32, 0x4004
+; GFX10-PAL-NEXT:    s_add_i32 s0, s32, 0x4004
+; GFX10-PAL-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
+; GFX10-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s2
+; GFX10-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s1 offset:16
+; GFX10-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:32
+; GFX10-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:48
+; GFX10-PAL-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-PAL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-PAL-LABEL: zero_init_large_offset_foo:
 ; GFX11-PAL:       ; %bb.0:
@@ -2407,18 +2374,20 @@ define void @zero_init_large_offset_foo() {
 ; GFX11-PAL-NEXT:    scratch_load_b32 v0, off, s32 offset:4 glc dlc
 ; GFX11-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-PAL-NEXT:    s_mov_b32 s0, 0
-; GFX11-PAL-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
+; GFX11-PAL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
 ; GFX11-PAL-NEXT:    s_mov_b32 s1, s0
 ; GFX11-PAL-NEXT:    s_mov_b32 s2, s0
 ; GFX11-PAL-NEXT:    s_mov_b32 s3, s0
 ; GFX11-PAL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
 ; GFX11-PAL-NEXT:    v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
-; GFX11-PAL-NEXT:    scratch_store_b128 off, v[0:3], vcc_lo
-; GFX11-PAL-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
-; GFX11-PAL-NEXT:    scratch_store_b128 off, v[0:3], vcc_lo offset:16
-; GFX11-PAL-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
-; GFX11-PAL-NEXT:    scratch_store_b128 off, v[0:3], vcc_lo offset:32
+; GFX11-PAL-NEXT:    s_add_i32 s2, s32, 0x4004
+; GFX11-PAL-NEXT:    s_add_i32 s1, s32, 0x4004
+; GFX11-PAL-NEXT:    s_add_i32 s0, s32, 0x4004
 ; GFX11-PAL-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
+; GFX11-PAL-NEXT:    s_clause 0x3
+; GFX11-PAL-NEXT:    scratch_store_b128 off, v[0:3], s2
+; GFX11-PAL-NEXT:    scratch_store_b128 off, v[0:3], s1 offset:16
+; GFX11-PAL-NEXT:    scratch_store_b128 off, v[0:3], s0 offset:32
 ; GFX11-PAL-NEXT:    scratch_store_b128 off, v[0:3], vcc_lo offset:48
 ; GFX11-PAL-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX11-PAL-NEXT:    s_setpc_b64 s[30:31]
@@ -2986,13 +2955,13 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) {
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-NEXT:    v_and_b32_e32 v1, 15, v0
+; GFX10-NEXT:    s_add_i32 s0, s32, 0x4004
 ; GFX10-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
+; GFX10-NEXT:    v_lshl_add_u32 v0, v0, 2, s0
 ; GFX10-NEXT:    v_mov_b32_e32 v2, 15
-; GFX10-NEXT:    v_lshl_add_u32 v0, v0, 2, vcc_lo
-; GFX10-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
+; GFX10-NEXT:    v_lshl_add_u32 v1, v1, 2, vcc_lo
 ; GFX10-NEXT:    scratch_load_dword v3, off, s32 offset:4 glc dlc
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    v_lshl_add_u32 v1, v1, 2, vcc_lo
 ; GFX10-NEXT:    scratch_store_dword v0, v2, off
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-NEXT:    scratch_load_dword v0, v1, off glc dlc
@@ -3005,13 +2974,14 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) {
 ; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX11-NEXT:    v_dual_mov_b32 v2, 15 :: v_dual_and_b32 v1, 15, v0
 ; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-NEXT:    s_add_i32 s0, s32, 0x4004
 ; GFX11-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 2, v1
 ; GFX11-NEXT:    scratch_load_b32 v3, off, s32 offset:4 glc dlc
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 2, v1
-; GFX11-NEXT:    scratch_store_b32 v0, v2, vcc_lo dlc
+; GFX11-NEXT:    scratch_store_b32 v0, v2, s0 dlc
 ; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX11-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
 ; GFX11-NEXT:    scratch_load_b32 v0, v1, vcc_lo glc dlc
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
@@ -3040,9 +3010,9 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) {
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
 ; GFX940-NEXT:    v_mov_b32_e32 v2, 15
-; GFX940-NEXT:    s_add_i32 vcc_hi, s32, 0x4004
+; GFX940-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
 ; GFX940-NEXT:    v_and_b32_e32 v0, 15, v0
-; GFX940-NEXT:    scratch_store_dword v1, v2, vcc_hi sc0 sc1
+; GFX940-NEXT:    scratch_store_dword v1, v2, vcc_lo sc0 sc1
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX940-NEXT:    s_add_i32 vcc_hi, s32, 0x4004
@@ -3055,13 +3025,13 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) {
 ; GFX10-PAL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-PAL-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-PAL-NEXT:    v_and_b32_e32 v1, 15, v0
+; GFX10-PAL-NEXT:    s_add_i32 s0, s32, 0x4004
 ; GFX10-PAL-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
+; GFX10-PAL-NEXT:    v_lshl_add_u32 v0, v0, 2, s0
 ; GFX10-PAL-NEXT:    v_mov_b32_e32 v2, 15
-; GFX10-PAL-NEXT:    v_lshl_add_u32 v0, v0, 2, vcc_lo
-; GFX10-PAL-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
+; GFX10-PAL-NEXT:    v_lshl_add_u32 v1, v1, 2, vcc_lo
 ; GFX10-PAL-NEXT:    scratch_load_dword v3, off, s32 offset:4 glc dlc
 ; GFX10-PAL-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-PAL-NEXT:    v_lshl_add_u32 v1, v1, 2, vcc_lo
 ; GFX10-PAL-NEXT:    scratch_store_dword v0, v2, off
 ; GFX10-PAL-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-PAL-NEXT:    scratch_load_dword v0, v1, off glc dlc
@@ -3074,13 +3044,14 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) {
 ; GFX11-PAL-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX11-PAL-NEXT:    v_dual_mov_b32 v2, 15 :: v_dual_and_b32 v1, 15, v0
 ; GFX11-PAL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-PAL-NEXT:    s_add_i32 s0, s32, 0x4004
 ; GFX11-PAL-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
+; GFX11-PAL-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-PAL-NEXT:    v_lshlrev_b32_e32 v1, 2, v1
 ; GFX11-PAL-NEXT:    scratch_load_b32 v3, off, s32 offset:4 glc dlc
 ; GFX11-PAL-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-PAL-NEXT:    v_lshlrev_b32_e32 v1, 2, v1
-; GFX11-PAL-NEXT:    scratch_store_b32 v0, v2, vcc_lo dlc
+; GFX11-PAL-NEXT:    scratch_store_b32 v0, v2, s0 dlc
 ; GFX11-PAL-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX11-PAL-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
 ; GFX11-PAL-NEXT:    scratch_load_b32 v0, v1, vcc_lo glc dlc
 ; GFX11-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-PAL-NEXT:    s_setpc_b64 s[30:31]
@@ -4138,8 +4109,8 @@ define amdgpu_ps void @large_offset() {
 ; GFX9-NEXT:    v_mov_b32_e32 v1, v0
 ; GFX9-NEXT:    v_mov_b32_e32 v2, v0
 ; GFX9-NEXT:    v_mov_b32_e32 v3, v0
-; GFX9-NEXT:    s_mov_b32 vcc_hi, 0
-; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:3024
+; GFX9-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:3024
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_mov_b32 vcc_hi, 0
 ; GFX9-NEXT:    scratch_load_dwordx4 v[0:3], off, vcc_hi offset:3024 glc
@@ -4213,8 +4184,8 @@ define amdgpu_ps void @large_offset() {
 ; GFX9-PAL-NEXT:    s_and_b32 s3, s3, 0xffff
 ; GFX9-PAL-NEXT:    s_add_u32 flat_scratch_lo, s2, s0
 ; GFX9-PAL-NEXT:    s_addc_u32 flat_scratch_hi, s3, 0
-; GFX9-PAL-NEXT:    s_mov_b32 vcc_hi, 0
-; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:3024
+; GFX9-PAL-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:3024
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_mov_b32 vcc_hi, 0
 ; GFX9-PAL-NEXT:    scratch_load_dwordx4 v[0:3], off, vcc_hi offset:3024 glc

diff  --git a/llvm/test/CodeGen/AMDGPU/frame-index.mir b/llvm/test/CodeGen/AMDGPU/frame-index.mir
index 82bd45d71da14..bdf7364b74bed 100644
--- a/llvm/test/CodeGen/AMDGPU/frame-index.mir
+++ b/llvm/test/CodeGen/AMDGPU/frame-index.mir
@@ -89,8 +89,8 @@ body:             |
     ; GCN-LABEL: name: func_add_constant_to_fi_uniform_SCC_clobber_i32
     ; GCN: liveins: $sgpr30_sgpr31
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: $vcc_hi = S_LSHR_B32 6, $sgpr32, implicit-def dead $scc
-    ; GCN-NEXT: renamable $sgpr4 = nuw S_ADD_U32 killed $vcc_hi, 4, implicit-def $scc
+    ; GCN-NEXT: $vcc_lo = S_LSHR_B32 6, $sgpr32, implicit-def dead $scc
+    ; GCN-NEXT: renamable $sgpr4 = nuw S_ADD_U32 killed $vcc_lo, 4, implicit-def $scc
     ; GCN-NEXT: renamable $sgpr5 = S_ADDC_U32 $sgpr4, 1234567, implicit-def $scc, implicit $scc
     ; GCN-NEXT: $vcc_hi = S_LSHR_B32 $sgpr32, 6, implicit-def $scc
     ; GCN-NEXT: $vcc_hi = S_ADD_I32 killed $vcc_hi, 8, implicit-def $scc
@@ -184,3 +184,30 @@ body:             |
     S_SETPC_B64_return killed renamable $sgpr30_sgpr31
 
 ...
+---
+name: func_frame_idx_at_the_end_of_bb
+tracksRegLiveness: true
+stack:
+  - { id: 0, type: default, offset: 0, size: 8, alignment: 4,
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+      local-offset: 0, debug-info-variable: '', debug-info-expression: '',
+      debug-info-location: '' }
+machineFunctionInfo:
+  stackPtrOffsetReg: '$sgpr32'
+
+body:             |
+  bb.0:
+    liveins: $vgpr31
+
+    ; GCN-LABEL: name: func_frame_idx_at_the_end_of_bb
+    ; GCN: liveins: $vgpr31
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: renamable $vgpr0 = V_AND_B32_e32 1023, killed $vgpr31, implicit $exec
+    ; GCN-NEXT: renamable $vgpr0 = V_LSHLREV_B32_e32 2, killed $vgpr0, implicit $exec
+    ; GCN-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+    ; GCN-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 killed $vgpr1, killed $vgpr0, implicit-def dead $vcc, implicit $exec
+    renamable $vgpr0 = V_AND_B32_e32 1023, killed $vgpr31, implicit $exec
+    renamable $vgpr0 = V_LSHLREV_B32_e32 2, killed $vgpr0, implicit $exec
+    renamable $vgpr0 = V_ADD_CO_U32_e32 %stack.0, killed $vgpr0, implicit-def dead $vcc, implicit $exec
+...
+

diff  --git a/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll b/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll
index 450cae8a4a388..55cd662a0bd8e 100644
--- a/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll
+++ b/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll
@@ -42,14 +42,14 @@ define void @callee_with_stack_and_call() #0 {
 ; NO-SPILL-TO-VGPR-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
 ; NO-SPILL-TO-VGPR-NEXT:    s_mov_b32 s33, s32
 ; NO-SPILL-TO-VGPR-NEXT:    s_addk_i32 s32, 0x800
-; NO-SPILL-TO-VGPR-NEXT:    s_mov_b64 s[8:9], exec
+; NO-SPILL-TO-VGPR-NEXT:    s_mov_b64 s[10:11], exec
 ; NO-SPILL-TO-VGPR-NEXT:    s_mov_b64 exec, 1
-; NO-SPILL-TO-VGPR-NEXT:    buffer_store_dword v1, off, s[0:3], s33 offset:16
-; NO-SPILL-TO-VGPR-NEXT:    v_writelane_b32 v1, s30, 0
-; NO-SPILL-TO-VGPR-NEXT:    buffer_store_dword v1, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
-; NO-SPILL-TO-VGPR-NEXT:    buffer_load_dword v1, off, s[0:3], s33 offset:16
+; NO-SPILL-TO-VGPR-NEXT:    buffer_store_dword v2, off, s[0:3], s33 offset:16
+; NO-SPILL-TO-VGPR-NEXT:    v_writelane_b32 v2, s30, 0
+; NO-SPILL-TO-VGPR-NEXT:    buffer_store_dword v2, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
+; NO-SPILL-TO-VGPR-NEXT:    buffer_load_dword v2, off, s[0:3], s33 offset:16
 ; NO-SPILL-TO-VGPR-NEXT:    s_waitcnt vmcnt(0)
-; NO-SPILL-TO-VGPR-NEXT:    s_mov_b64 exec, s[8:9]
+; NO-SPILL-TO-VGPR-NEXT:    s_mov_b64 exec, s[10:11]
 ; NO-SPILL-TO-VGPR-NEXT:    s_mov_b64 s[8:9], exec
 ; NO-SPILL-TO-VGPR-NEXT:    s_mov_b64 exec, 1
 ; NO-SPILL-TO-VGPR-NEXT:    buffer_store_dword v1, off, s[0:3], s33 offset:16
@@ -65,15 +65,15 @@ define void @callee_with_stack_and_call() #0 {
 ; NO-SPILL-TO-VGPR-NEXT:    s_add_u32 s4, s4, external_void_func_void at rel32@lo+4
 ; NO-SPILL-TO-VGPR-NEXT:    s_addc_u32 s5, s5, external_void_func_void at rel32@hi+12
 ; NO-SPILL-TO-VGPR-NEXT:    s_swappc_b64 s[30:31], s[4:5]
-; NO-SPILL-TO-VGPR-NEXT:    s_mov_b64 s[4:5], exec
+; NO-SPILL-TO-VGPR-NEXT:    s_mov_b64 s[6:7], exec
 ; NO-SPILL-TO-VGPR-NEXT:    s_mov_b64 exec, 1
-; NO-SPILL-TO-VGPR-NEXT:    buffer_store_dword v1, off, s[0:3], s33 offset:16
-; NO-SPILL-TO-VGPR-NEXT:    buffer_load_dword v1, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
+; NO-SPILL-TO-VGPR-NEXT:    buffer_store_dword v2, off, s[0:3], s33 offset:16
+; NO-SPILL-TO-VGPR-NEXT:    buffer_load_dword v2, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
 ; NO-SPILL-TO-VGPR-NEXT:    s_waitcnt vmcnt(0)
-; NO-SPILL-TO-VGPR-NEXT:    v_readlane_b32 s31, v1, 0
-; NO-SPILL-TO-VGPR-NEXT:    buffer_load_dword v1, off, s[0:3], s33 offset:16
+; NO-SPILL-TO-VGPR-NEXT:    v_readlane_b32 s31, v2, 0
+; NO-SPILL-TO-VGPR-NEXT:    buffer_load_dword v2, off, s[0:3], s33 offset:16
 ; NO-SPILL-TO-VGPR-NEXT:    s_waitcnt vmcnt(0)
-; NO-SPILL-TO-VGPR-NEXT:    s_mov_b64 exec, s[4:5]
+; NO-SPILL-TO-VGPR-NEXT:    s_mov_b64 exec, s[6:7]
 ; NO-SPILL-TO-VGPR-NEXT:    s_mov_b64 s[4:5], exec
 ; NO-SPILL-TO-VGPR-NEXT:    s_mov_b64 exec, 1
 ; NO-SPILL-TO-VGPR-NEXT:    buffer_store_dword v1, off, s[0:3], s33 offset:16

diff  --git a/llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll b/llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll
index f52783690ca80..df8e7a15a65f1 100644
--- a/llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll
+++ b/llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll
@@ -27,8 +27,8 @@ define amdgpu_kernel void @test_kernel(i32 %val) #0 {
 ; CHECK-NEXT:    ;;#ASMSTART
 ; CHECK-NEXT:    ; def vgpr10
 ; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    s_add_i32 s34, s33, 0x100100
-; CHECK-NEXT:    buffer_store_dword v10, off, s[0:3], s34 ; 4-byte Folded Spill
+; CHECK-NEXT:    s_add_i32 s8, s33, 0x100100
+; CHECK-NEXT:    buffer_store_dword v10, off, s[0:3], s8 ; 4-byte Folded Spill
 ; CHECK-NEXT:    s_mov_b64 s[18:19], 8
 ; CHECK-NEXT:    s_mov_b32 s8, s16
 ; CHECK-NEXT:    s_mov_b32 s9, s17
@@ -56,8 +56,8 @@ define amdgpu_kernel void @test_kernel(i32 %val) #0 {
 ; CHECK-NEXT:    s_mov_b64 s[2:3], s[22:23]
 ; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
 ; CHECK-NEXT:    s_swappc_b64 s[30:31], s[16:17]
-; CHECK-NEXT:    s_add_i32 s6, s33, 0x100100
-; CHECK-NEXT:    buffer_load_dword v10, off, s[0:3], s6 ; 4-byte Folded Reload
+; CHECK-NEXT:    s_add_i32 s4, s33, 0x100100
+; CHECK-NEXT:    buffer_load_dword v10, off, s[0:3], s4 ; 4-byte Folded Reload
 ; CHECK-NEXT:    v_readlane_b32 s4, v40, 1
 ; CHECK-NEXT:    s_mov_b32 s5, 0
 ; CHECK-NEXT:    s_cmp_eq_u32 s4, s5
@@ -66,8 +66,8 @@ define amdgpu_kernel void @test_kernel(i32 %val) #0 {
 ; CHECK-NEXT:    buffer_store_dword v10, v0, s[0:3], s33 offen ; 4-byte Folded Spill
 ; CHECK-NEXT:    s_cbranch_scc1 .LBB0_2
 ; CHECK-NEXT:  ; %bb.1: ; %store
-; CHECK-NEXT:    s_add_i32 s5, s33, 0x100000
-; CHECK-NEXT:    buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload
+; CHECK-NEXT:    s_add_i32 s4, s33, 0x100000
+; CHECK-NEXT:    buffer_load_dword v1, off, s[0:3], s4 ; 4-byte Folded Reload
 ; CHECK-NEXT:    ; implicit-def: $sgpr4
 ; CHECK-NEXT:    v_mov_b32_e32 v0, s4
 ; CHECK-NEXT:    s_waitcnt vmcnt(0)

diff  --git a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
index 151cb2c598d81..a8e4999a0bb0e 100644
--- a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
+++ b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
@@ -235,30 +235,30 @@ define amdgpu_kernel void @local_stack_offset_uses_sp_flat(<3 x i64> addrspace(1
 ; MUBUF-NEXT:    s_waitcnt vmcnt(0)
 ; MUBUF-NEXT:    v_or_b32_e32 v1, 0x12cc, v0
 ; MUBUF-NEXT:    v_or_b32_e32 v0, 0x12c8, v0
-; MUBUF-NEXT:    v_mov_b32_e32 v13, 0x4000
+; MUBUF-NEXT:    v_mov_b32_e32 v18, 0x4000
+; MUBUF-NEXT:    v_mov_b32_e32 v17, 0x4000
+; MUBUF-NEXT:    v_mov_b32_e32 v16, 0x4000
 ; MUBUF-NEXT:    buffer_load_dword v1, v1, s[0:3], 0 offen glc
 ; MUBUF-NEXT:    s_waitcnt vmcnt(0)
-; MUBUF-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
+; MUBUF-NEXT:    v_mov_b32_e32 v15, 0x4000
 ; MUBUF-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen glc
 ; MUBUF-NEXT:    s_waitcnt vmcnt(0)
-; MUBUF-NEXT:    v_mov_b32_e32 v12, 0
-; MUBUF-NEXT:    buffer_load_dword v8, v13, s[0:3], 0 offen glc
+; MUBUF-NEXT:    v_mov_b32_e32 v14, 0x4000
+; MUBUF-NEXT:    buffer_load_dword v8, v18, s[0:3], 0 offen glc
 ; MUBUF-NEXT:    s_waitcnt vmcnt(0)
-; MUBUF-NEXT:    v_mov_b32_e32 v13, 0x4000
-; MUBUF-NEXT:    buffer_load_dword v9, v13, s[0:3], 0 offen offset:4 glc
+; MUBUF-NEXT:    buffer_load_dword v9, v17, s[0:3], 0 offen offset:4 glc
 ; MUBUF-NEXT:    s_waitcnt vmcnt(0)
-; MUBUF-NEXT:    v_mov_b32_e32 v13, 0x4000
-; MUBUF-NEXT:    buffer_load_dword v2, v13, s[0:3], 0 offen offset:8 glc
+; MUBUF-NEXT:    buffer_load_dword v2, v16, s[0:3], 0 offen offset:8 glc
 ; MUBUF-NEXT:    s_waitcnt vmcnt(0)
-; MUBUF-NEXT:    v_mov_b32_e32 v13, 0x4000
-; MUBUF-NEXT:    buffer_load_dword v3, v13, s[0:3], 0 offen offset:12 glc
+; MUBUF-NEXT:    buffer_load_dword v3, v15, s[0:3], 0 offen offset:12 glc
 ; MUBUF-NEXT:    s_waitcnt vmcnt(0)
-; MUBUF-NEXT:    v_mov_b32_e32 v13, 0x4000
-; MUBUF-NEXT:    buffer_load_dword v10, v13, s[0:3], 0 offen offset:16 glc
+; MUBUF-NEXT:    buffer_load_dword v10, v14, s[0:3], 0 offen offset:16 glc
 ; MUBUF-NEXT:    s_waitcnt vmcnt(0)
 ; MUBUF-NEXT:    v_mov_b32_e32 v13, 0x4000
 ; MUBUF-NEXT:    buffer_load_dword v11, v13, s[0:3], 0 offen offset:20 glc
 ; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
+; MUBUF-NEXT:    v_mov_b32_e32 v12, 0
 ; MUBUF-NEXT:    v_add_co_u32_e32 v2, vcc, v0, v2
 ; MUBUF-NEXT:    v_addc_co_u32_e32 v3, vcc, v1, v3, vcc
 ; MUBUF-NEXT:    v_add_co_u32_e32 v0, vcc, v7, v8
@@ -296,10 +296,10 @@ define amdgpu_kernel void @local_stack_offset_uses_sp_flat(<3 x i64> addrspace(1
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
 ; FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 offset:704 glc
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    s_movk_i32 s3, 0x2000
 ; FLATSCR-NEXT:    s_movk_i32 s2, 0x2000
-; FLATSCR-NEXT:    scratch_load_dwordx2 v[10:11], off, s2 offset:16 glc
+; FLATSCR-NEXT:    scratch_load_dwordx2 v[10:11], off, s3 offset:16 glc
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; FLATSCR-NEXT:    s_movk_i32 s2, 0x2000
 ; FLATSCR-NEXT:    scratch_load_dwordx4 v[4:7], off, s2 glc
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
 ; FLATSCR-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0

diff  --git a/llvm/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir b/llvm/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir
index f0d0abd31ed52..aa4428f3da4eb 100644
--- a/llvm/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir
+++ b/llvm/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir
@@ -29,8 +29,8 @@ body:             |
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   $sgpr0 = S_ADD_U32 $sgpr0, $sgpr4, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
   ; CHECK-NEXT:   $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
-  ; CHECK-NEXT:   $sgpr5 = S_MOV_B32 524288
-  ; CHECK-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, align 8192, addrspace 5)
+  ; CHECK-NEXT:   $sgpr4 = S_MOV_B32 524288
+  ; CHECK-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr4, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, align 8192, addrspace 5)
   ; CHECK-NEXT:   S_BRANCH %bb.1
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1:

diff  --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir
index e61e224d32522..faf4c16a64ffe 100644
--- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir
+++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir
@@ -20,84 +20,109 @@ machineFunctionInfo:
   stackPtrOffsetReg:  $sgpr32
 
 body:             |
+  ; GFX8-LABEL: name: pei_scavenge_vgpr_spill
+  ; GFX8: bb.0:
+  ; GFX8-NEXT:   successors: %bb.1(0x80000000)
+  ; GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr2
+  ; GFX8-NEXT: {{  $}}
+  ; GFX8-NEXT:   $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+  ; GFX8-NEXT:   $sgpr6 = S_ADD_I32 $sgpr32, 1048832, implicit-def dead $scc
+  ; GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
+  ; GFX8-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
+  ; GFX8-NEXT:   $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2
+  ; GFX8-NEXT:   $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
+  ; GFX8-NEXT:   $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
+  ; GFX8-NEXT:   $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
+  ; GFX8-NEXT:   $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+  ; GFX8-NEXT:   $vcc_lo = S_MOV_B32 8192
+  ; GFX8-NEXT:   $vgpr0, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr0, 0, implicit $exec
+  ; GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
+  ; GFX8-NEXT:   $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+  ; GFX8-NEXT:   $vcc_lo = S_MOV_B32 16384
+  ; GFX8-NEXT:   $vgpr2, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr2, 0, implicit $exec
+  ; GFX8-NEXT:   $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec
+  ; GFX8-NEXT:   $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
+  ; GFX8-NEXT:   S_BRANCH %bb.1
+  ; GFX8-NEXT: {{  $}}
+  ; GFX8-NEXT: bb.1:
+  ; GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr2
+  ; GFX8-NEXT: {{  $}}
+  ; GFX8-NEXT:   $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
+  ; GFX8-NEXT:   $sgpr33 = V_READLANE_B32 $vgpr2, 0
+  ; GFX8-NEXT:   $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+  ; GFX8-NEXT:   $sgpr6 = S_ADD_I32 $sgpr32, 1048832, implicit-def dead $scc
+  ; GFX8-NEXT:   $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
+  ; GFX8-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
+  ; GFX8-NEXT:   S_ENDPGM 0, amdgpu_allvgprs
+  ; GFX9-LABEL: name: pei_scavenge_vgpr_spill
+  ; GFX9: bb.0:
+  ; GFX9-NEXT:   successors: %bb.1(0x80000000)
+  ; GFX9-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr2
+  ; GFX9-NEXT: {{  $}}
+  ; GFX9-NEXT:   $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+  ; GFX9-NEXT:   $sgpr6 = S_ADD_I32 $sgpr32, 1048832, implicit-def dead $scc
+  ; GFX9-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
+  ; GFX9-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
+  ; GFX9-NEXT:   $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2
+  ; GFX9-NEXT:   $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
+  ; GFX9-NEXT:   $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
+  ; GFX9-NEXT:   $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
+  ; GFX9-NEXT:   $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+  ; GFX9-NEXT:   $vgpr0 = V_ADD_U32_e32 8192, killed $vgpr0, implicit $exec
+  ; GFX9-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
+  ; GFX9-NEXT:   $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+  ; GFX9-NEXT:   $vgpr2 = V_ADD_U32_e32 16384, killed $vgpr2, implicit $exec
+  ; GFX9-NEXT:   $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec
+  ; GFX9-NEXT:   $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
+  ; GFX9-NEXT:   S_BRANCH %bb.1
+  ; GFX9-NEXT: {{  $}}
+  ; GFX9-NEXT: bb.1:
+  ; GFX9-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr2
+  ; GFX9-NEXT: {{  $}}
+  ; GFX9-NEXT:   $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
+  ; GFX9-NEXT:   $sgpr33 = V_READLANE_B32 $vgpr2, 0
+  ; GFX9-NEXT:   $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+  ; GFX9-NEXT:   $sgpr6 = S_ADD_I32 $sgpr32, 1048832, implicit-def dead $scc
+  ; GFX9-NEXT:   $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
+  ; GFX9-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
+  ; GFX9-NEXT:   S_ENDPGM 0, amdgpu_allvgprs
+  ; GFX9-FLATSCR-LABEL: name: pei_scavenge_vgpr_spill
+  ; GFX9-FLATSCR: bb.0:
+  ; GFX9-FLATSCR-NEXT:   successors: %bb.1(0x80000000)
+  ; GFX9-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr2
+  ; GFX9-FLATSCR-NEXT: {{  $}}
+  ; GFX9-FLATSCR-NEXT:   $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+  ; GFX9-FLATSCR-NEXT:   $sgpr6 = S_ADD_I32 $sgpr32, 16388, implicit-def dead $scc
+  ; GFX9-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr2, killed $sgpr6, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.3, addrspace 5)
+  ; GFX9-FLATSCR-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
+  ; GFX9-FLATSCR-NEXT:   $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2
+  ; GFX9-FLATSCR-NEXT:   $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc
+  ; GFX9-FLATSCR-NEXT:   $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc
+  ; GFX9-FLATSCR-NEXT:   $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc
+  ; GFX9-FLATSCR-NEXT:   $vcc_lo = S_ADD_I32 $sgpr33, 8192, implicit-def $scc
+  ; GFX9-FLATSCR-NEXT:   $vgpr0 = V_MOV_B32_e32 killed $vcc_lo, implicit $exec
+  ; GFX9-FLATSCR-NEXT:   $vcc_hi = S_ADD_I32 $sgpr33, 16384, implicit-def $scc
+  ; GFX9-FLATSCR-NEXT:   $vgpr0 = V_OR_B32_e32 killed $vcc_hi, $vgpr1, implicit $exec
+  ; GFX9-FLATSCR-NEXT:   S_BRANCH %bb.1
+  ; GFX9-FLATSCR-NEXT: {{  $}}
+  ; GFX9-FLATSCR-NEXT: bb.1:
+  ; GFX9-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr2
+  ; GFX9-FLATSCR-NEXT: {{  $}}
+  ; GFX9-FLATSCR-NEXT:   $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -32768, implicit-def dead $scc
+  ; GFX9-FLATSCR-NEXT:   $sgpr33 = V_READLANE_B32 $vgpr2, 0
+  ; GFX9-FLATSCR-NEXT:   $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+  ; GFX9-FLATSCR-NEXT:   $sgpr6 = S_ADD_I32 $sgpr32, 16388, implicit-def dead $scc
+  ; GFX9-FLATSCR-NEXT:   $vgpr2 = SCRATCH_LOAD_DWORD_SADDR killed $sgpr6, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.3, addrspace 5)
+  ; GFX9-FLATSCR-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
+  ; GFX9-FLATSCR-NEXT:   S_ENDPGM 0, amdgpu_allvgprs
   bb.0:
     liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
 
-    ; GFX8-LABEL: name: pei_scavenge_vgpr_spill
-    ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr2
-    ; GFX8-NEXT: {{  $}}
-    ; GFX8-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GFX8-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 1048832, implicit-def dead $scc
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
-    ; GFX8-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
-    ; GFX8-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2
-    ; GFX8-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
-    ; GFX8-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
-    ; GFX8-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
-    ; GFX8-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
-    ; GFX8-NEXT: $vcc_lo = S_MOV_B32 8192
-    ; GFX8-NEXT: $vgpr0, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr0, 0, implicit $exec
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
-    ; GFX8-NEXT: $vgpr3 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
-    ; GFX8-NEXT: $vcc_lo = S_MOV_B32 16384
-    ; GFX8-NEXT: $vgpr3, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr3, 0, implicit $exec
-    ; GFX8-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec
-    ; GFX8-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
-    ; GFX8-NEXT: $sgpr33 = V_READLANE_B32 $vgpr2, 0
-    ; GFX8-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GFX8-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 1048832, implicit-def dead $scc
-    ; GFX8-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
-    ; GFX8-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
-    ; GFX8-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
-    ; GFX8-NEXT: S_ENDPGM 0, amdgpu_allvgprs
-    ; GFX9-LABEL: name: pei_scavenge_vgpr_spill
-    ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr2
-    ; GFX9-NEXT: {{  $}}
-    ; GFX9-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GFX9-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 1048832, implicit-def dead $scc
-    ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
-    ; GFX9-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
-    ; GFX9-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2
-    ; GFX9-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
-    ; GFX9-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
-    ; GFX9-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
-    ; GFX9-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
-    ; GFX9-NEXT: $vgpr0 = V_ADD_U32_e32 8192, killed $vgpr0, implicit $exec
-    ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
-    ; GFX9-NEXT: $vgpr3 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
-    ; GFX9-NEXT: $vgpr3 = V_ADD_U32_e32 16384, killed $vgpr3, implicit $exec
-    ; GFX9-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec
-    ; GFX9-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
-    ; GFX9-NEXT: $sgpr33 = V_READLANE_B32 $vgpr2, 0
-    ; GFX9-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GFX9-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 1048832, implicit-def dead $scc
-    ; GFX9-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
-    ; GFX9-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
-    ; GFX9-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
-    ; GFX9-NEXT: S_ENDPGM 0, amdgpu_allvgprs
-    ; GFX9-FLATSCR-LABEL: name: pei_scavenge_vgpr_spill
-    ; GFX9-FLATSCR: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr2
-    ; GFX9-FLATSCR-NEXT: {{  $}}
-    ; GFX9-FLATSCR-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GFX9-FLATSCR-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 16388, implicit-def dead $scc
-    ; GFX9-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, killed $sgpr6, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.3, addrspace 5)
-    ; GFX9-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
-    ; GFX9-FLATSCR-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2
-    ; GFX9-FLATSCR-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc
-    ; GFX9-FLATSCR-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc
-    ; GFX9-FLATSCR-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc
-    ; GFX9-FLATSCR-NEXT: $vcc_hi = S_ADD_I32 $sgpr33, 8192, implicit-def $scc
-    ; GFX9-FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 killed $vcc_hi, implicit $exec
-    ; GFX9-FLATSCR-NEXT: $vcc_hi = S_ADD_I32 $sgpr33, 16384, implicit-def $scc
-    ; GFX9-FLATSCR-NEXT: $vgpr0 = V_OR_B32_e32 killed $vcc_hi, $vgpr1, implicit $exec
-    ; GFX9-FLATSCR-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -32768, implicit-def dead $scc
-    ; GFX9-FLATSCR-NEXT: $sgpr33 = V_READLANE_B32 $vgpr2, 0
-    ; GFX9-FLATSCR-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GFX9-FLATSCR-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 16388, implicit-def dead $scc
-    ; GFX9-FLATSCR-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR killed $sgpr6, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.3, addrspace 5)
-    ; GFX9-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
-    ; GFX9-FLATSCR-NEXT: S_ENDPGM 0, amdgpu_allvgprs
     $vgpr0 = V_MOV_B32_e32 %stack.0, implicit $exec
     $vgpr0 = V_OR_B32_e32 %stack.1, $vgpr1, implicit $exec
+    S_BRANCH %bb.1
+
+  bb.1:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
     S_ENDPGM 0, amdgpu_allvgprs
 ...

diff  --git a/llvm/test/CodeGen/AMDGPU/scratch-simple.ll b/llvm/test/CodeGen/AMDGPU/scratch-simple.ll
index 8ae513783b875..cddf456777d51 100644
--- a/llvm/test/CodeGen/AMDGPU/scratch-simple.ll
+++ b/llvm/test/CodeGen/AMDGPU/scratch-simple.ll
@@ -72,7 +72,6 @@
 ; GFX10-FLATSCR: v_and_b32_e32 [[CLAMP_IDX:v[0-9]+]], 0x1fc, v0
 ; GFX10-FLATSCR-PAL: v_and_b32_e32 [[CLAMP_IDX:v[0-9]+]], 0x1fc, v0
 ; GFX11-FLATSCR: v_and_b32_e32 [[CLAMP_IDX:v[0-9]+]], 0x1fc, v0
-; GCN-NOT: s_mov_b32 s0
 
 ; MUBUF-DAG: v_add{{_|_nc_}}{{i|u}}32_e32 [[HI_OFF:v[0-9]+]],{{.*}} 0x280, [[CLAMP_IDX]]
 ; MUBUF-DAG: v_add{{_|_nc_}}{{i|u}}32_e32 [[LO_OFF:v[0-9]+]],{{.*}} {{v2|0x80}}, [[CLAMP_IDX]]
@@ -120,7 +119,6 @@ define amdgpu_ps float @ps_main(i32 %idx) {
 ; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3
 
 ; MUBUF-DAG: s_mov_b32 s0, SCRATCH_RSRC_DWORD0
-; GCN-NOT: s_mov_b32 s0
 
 ; FLATSCR-NOT: SCRATCH_RSRC_DWORD
 

diff  --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-to-vmem-scc-clobber.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-to-vmem-scc-clobber.mir
index 6fca8187f6e21..2fca0021f5f99 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-to-vmem-scc-clobber.mir
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-to-vmem-scc-clobber.mir
@@ -235,6 +235,7 @@ body:             |
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
   ; VMEM-GFX8-NEXT:   successors: %bb.2(0x80000000)
+  ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_NOP 0
   ; VMEM-GFX8-NEXT: {{  $}}
@@ -247,6 +248,7 @@ body:             |
     S_CBRANCH_SCC1 %bb.2, implicit $scc
 
   bb.1:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8
     S_NOP 0
 
   bb.2:
@@ -286,7 +288,7 @@ body:             |
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
   ; VMEM-GFX8-NEXT:   successors: %bb.2(0x80000000)
-  ; VMEM-GFX8-NEXT:   liveins: $sgpr8
+  ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_NOP 0
   ; VMEM-GFX8-NEXT: {{  $}}
@@ -301,7 +303,7 @@ body:             |
     S_CBRANCH_SCC1 %bb.2, implicit $scc
 
   bb.1:
-    liveins: $sgpr8
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8
     S_NOP 0
 
   bb.2:
@@ -343,6 +345,7 @@ body:             |
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
   ; VMEM-GFX8-NEXT:   successors: %bb.2(0x80000000)
+  ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8_sgpr9
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_NOP 0
   ; VMEM-GFX8-NEXT: {{  $}}
@@ -355,6 +358,7 @@ body:             |
     S_CBRANCH_SCC1 %bb.2, implicit $scc
 
   bb.1:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8_sgpr9
     S_NOP 0
 
   bb.2:
@@ -395,7 +399,7 @@ body:             |
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
   ; VMEM-GFX8-NEXT:   successors: %bb.2(0x80000000)
-  ; VMEM-GFX8-NEXT:   liveins: $sgpr8_sgpr9
+  ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8_sgpr9
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_NOP 0
   ; VMEM-GFX8-NEXT: {{  $}}
@@ -410,7 +414,7 @@ body:             |
     S_CBRANCH_SCC1 %bb.2, implicit $scc
 
   bb.1:
-    liveins: $sgpr8_sgpr9
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8_sgpr9
     S_NOP 0
 
   bb.2:
@@ -440,7 +444,7 @@ body:             |
   ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8, $sgpr9
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; VMEM-GFX8-NEXT:   $sgpr4_sgpr5 = S_MOV_B64 $exec
+  ; VMEM-GFX8-NEXT:   $sgpr6_sgpr7 = S_MOV_B64 $exec
   ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 1
   ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
   ; VMEM-GFX8-NEXT:   $vgpr0 = V_WRITELANE_B32 $sgpr8, 0, undef $vgpr0
@@ -448,22 +452,23 @@ body:             |
   ; VMEM-GFX8-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
   ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
   ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
-  ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
+  ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 killed $sgpr6_sgpr7
+  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
   ; VMEM-GFX8-NEXT:   $sgpr4_sgpr5 = S_MOV_B64 $exec
-  ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 1, implicit-def $vgpr1
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
-  ; VMEM-GFX8-NEXT:   $vgpr1 = V_WRITELANE_B32 $sgpr9, 0, undef $vgpr1
+  ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 1
   ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
-  ; VMEM-GFX8-NEXT:   $vgpr0 = V_MOV_B32_e32 8200, implicit $exec
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN killed $vgpr1, killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
-  ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr1
-  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr0 = V_WRITELANE_B32 $sgpr9, 0, undef $vgpr0
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
   ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
+  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
   ; VMEM-GFX8-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
   ; VMEM-GFX8-NEXT:   successors: %bb.2(0x80000000)
+  ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8, $sgpr9
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_NOP 0
   ; VMEM-GFX8-NEXT: {{  $}}
@@ -477,6 +482,7 @@ body:             |
     S_CBRANCH_SCC1 %bb.2, implicit $scc
 
   bb.1:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8, $sgpr9
     S_NOP 0
 
   bb.2:
@@ -502,7 +508,7 @@ body:             |
   ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; VMEM-GFX8-NEXT:   $sgpr4_sgpr5 = S_MOV_B64 $exec
+  ; VMEM-GFX8-NEXT:   $sgpr6_sgpr7 = S_MOV_B64 $exec
   ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 1
   ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
   ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
@@ -510,23 +516,23 @@ body:             |
   ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
   ; VMEM-GFX8-NEXT:   $sgpr8 = V_READLANE_B32 killed $vgpr0, 0
   ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
-  ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
+  ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 killed $sgpr6_sgpr7
+  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
   ; VMEM-GFX8-NEXT:   $sgpr4_sgpr5 = S_MOV_B64 $exec
-  ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 1, implicit-def $vgpr1
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 1
   ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
-  ; VMEM-GFX8-NEXT:   $vgpr0 = V_MOV_B32_e32 16392, implicit $exec
-  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
-  ; VMEM-GFX8-NEXT:   $sgpr9 = V_READLANE_B32 killed $vgpr1, 0
-  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
-  ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr1
-  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr1 = V_MOV_B32_e32 16392, implicit $exec
+  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $sgpr9 = V_READLANE_B32 killed $vgpr0, 0
   ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
+  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
   ; VMEM-GFX8-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
   ; VMEM-GFX8-NEXT:   successors: %bb.2(0x80000000)
-  ; VMEM-GFX8-NEXT:   liveins: $sgpr8, $sgpr9
+  ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8, $sgpr9
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_NOP 0
   ; VMEM-GFX8-NEXT: {{  $}}
@@ -542,7 +548,7 @@ body:             |
     S_CBRANCH_SCC1 %bb.2, implicit $scc
 
   bb.1:
-    liveins: $sgpr8, $sgpr9
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8, $sgpr9
     S_NOP 0
 
   bb.2:

diff  --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-vmem-large-frame.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-vmem-large-frame.mir
index 076e891b16bbc..87ccf7a930ef9 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-vmem-large-frame.mir
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-vmem-large-frame.mir
@@ -1,35 +1,10 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -amdgpu-spill-sgpr-to-vgpr=false -verify-machineinstrs -run-pass=si-lower-sgpr-spills,prologepilog -o - %s | FileCheck %s
 
 # Check that we allocate 2 emergency stack slots if we're spilling
 # SGPRs to memory and potentially have an offset larger than fits in
 # the addressing mode of the memory instructions.
 
-# CHECK-LABEL: name: test
-# CHECK: stack:
-# CHECK-NEXT: - { id: 0, name: '', type: spill-slot, offset: 8, size: 4, alignment: 4,
-# CHECK-NEXT: stack-id: default, callee-saved-register: '', callee-saved-restored: true,
-# CHECK-NEXT: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
-# CHECK-NEXT: - { id: 1, name: '', type: default, offset: 12, size: 4096, alignment: 4,
-# CHECK-NEXT: stack-id: default, callee-saved-register: '', callee-saved-restored: true,
-# CHECK-NEXT: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
-# CHECK-NEXT: - { id: 2, name: '', type: default, offset: 0, size: 4, alignment: 4,
-# CHECK-NEXT: stack-id: default, callee-saved-register: '', callee-saved-restored: true,
-# CHECK-NEXT: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
-# CHECK-NEXT: - { id: 3, name: '', type: default, offset: 4, size: 4, alignment: 4,
-# CHECK-NEXT: stack-id: default, callee-saved-register: '', callee-saved-restored: true,
-# CHECK-NEXT: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
-
-
-# CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
-# CHECK-NEXT: $vgpr1 = V_WRITELANE_B32 killed $sgpr10, 0, undef $vgpr1
-# CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-# CHECK-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
-
-
-# CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
-# CHECK-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
-# CHECK-NEXT: $sgpr10 = V_READLANE_B32 killed $vgpr1, 0
-# CHECK-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
 ---
 name:            test
 tracksRegLiveness: true
@@ -47,6 +22,25 @@ machineFunctionInfo:
 body:             |
   bb.0:
     liveins: $sgpr30_sgpr31, $sgpr10, $sgpr11
+    ; CHECK-LABEL: name: test
+    ; CHECK: liveins: $sgpr30_sgpr31, $sgpr10, $sgpr11
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
+    ; CHECK-NEXT: $sgpr6_sgpr7 = S_MOV_B64 $exec
+    ; CHECK-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr2
+    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+    ; CHECK-NEXT: $vgpr2 = V_WRITELANE_B32 killed $sgpr10, 0, undef $vgpr2
+    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+    ; CHECK-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+    ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7, implicit killed $vgpr2
+    ; CHECK-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec
+    ; CHECK-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr1
+    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+    ; CHECK-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; CHECK-NEXT: $sgpr10 = V_READLANE_B32 killed $vgpr1, 0
+    ; CHECK-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+    ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr1
+    ; CHECK-NEXT: S_SETPC_B64 $sgpr30_sgpr31, implicit $scc
     S_CMP_EQ_U32 0, 0, implicit-def $scc
     SI_SPILL_S32_SAVE killed $sgpr10, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
     renamable $sgpr10 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32

diff  --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir
index c0f70c7ef2acb..8c0211ac25068 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir
@@ -253,81 +253,81 @@ body:             |
     ; FLATSCR: $flat_scr_hi = S_ADDC_U32 $sgpr1, 0, implicit-def $scc, implicit $scc
     ; FLATSCR: renamable $sgpr12 = IMPLICIT_DEF
     ; FLATSCR: INLINEASM &"", 0 /* attdialect */, implicit-def $vgpr0
-    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
+    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
     ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
-    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
+    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
     ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
     ; FLATSCR: $vgpr0 = V_WRITELANE_B32 killed $sgpr12, 0, undef $vgpr0
     ; FLATSCR: $sgpr12 = S_MOV_B32 $exec_lo
     ; FLATSCR: $exec_lo = S_MOV_B32 1
-    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 4, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
+    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 4, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
     ; FLATSCR: $exec_lo = S_MOV_B32 -1
-    ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5)
+    ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5)
     ; FLATSCR: INLINEASM &"", 1 /* sideeffect attdialect */, implicit $vgpr0
     ; FLATSCR: $exec_lo = S_MOV_B32 killed $sgpr12
     ; FLATSCR: renamable $sgpr12 = IMPLICIT_DEF
-    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
+    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
     ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
-    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
+    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
     ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
     ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0
     ; FLATSCR: $sgpr12 = S_MOV_B32 $exec_lo
     ; FLATSCR: $exec_lo = S_MOV_B32 1
-    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 4, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
+    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 4, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
     ; FLATSCR: $exec_lo = S_MOV_B32 killed $sgpr12
     ; FLATSCR: $sgpr12 = V_READLANE_B32 killed $vgpr0, 0
-    ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5)
+    ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5)
     ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
-    ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5)
+    ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5)
     ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
     ; FLATSCR: renamable $sgpr12_sgpr13 = IMPLICIT_DEF
-    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
+    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
     ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
-    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
+    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
     ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
     ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13
     ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit killed $sgpr12_sgpr13
     ; FLATSCR: $sgpr12_sgpr13 = S_MOV_B64 $exec
     ; FLATSCR: $exec = S_MOV_B64 3
-    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 8, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5)
+    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 8, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5)
     ; FLATSCR: $exec = S_MOV_B64 -1
-    ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5)
+    ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5)
     ; FLATSCR: $exec = S_MOV_B64 killed $sgpr12_sgpr13
     ; FLATSCR: renamable $sgpr12_sgpr13 = IMPLICIT_DEF
-    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
+    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
     ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
-    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
+    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
     ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
     ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13
     ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13
     ; FLATSCR: $sgpr12_sgpr13 = S_MOV_B64 $exec
     ; FLATSCR: $exec = S_MOV_B64 3
-    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 8, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5)
+    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 8, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5)
     ; FLATSCR: $exec = S_MOV_B64 killed $sgpr12_sgpr13
     ; FLATSCR: $sgpr13 = V_READLANE_B32 $vgpr0, 1
     ; FLATSCR: $sgpr12 = V_READLANE_B32 killed $vgpr0, 0
-    ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5)
+    ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5)
     ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
-    ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5)
+    ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5)
     ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
     ; FLATSCR: renamable $sgpr12_sgpr13_sgpr14 = IMPLICIT_DEF
-    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
+    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
     ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
-    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
+    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
     ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
     ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14
     ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14
     ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14
     ; FLATSCR: $sgpr12_sgpr13 = S_MOV_B64 $exec
     ; FLATSCR: $exec = S_MOV_B64 7
-    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 16, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
+    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 16, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
     ; FLATSCR: $exec = S_MOV_B64 -1
-    ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5)
+    ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5)
     ; FLATSCR: $exec = S_MOV_B64 killed $sgpr12_sgpr13
     ; FLATSCR: renamable $sgpr12_sgpr13_sgpr14_sgpr15 = IMPLICIT_DEF
-    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
+    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
     ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
-    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
+    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
     ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
     ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15
     ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15
@@ -335,14 +335,14 @@ body:             |
     ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15
     ; FLATSCR: $sgpr12_sgpr13 = S_MOV_B64 $exec
     ; FLATSCR: $exec = S_MOV_B64 15
-    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 28, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.3, addrspace 5)
+    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 28, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.3, addrspace 5)
     ; FLATSCR: $exec = S_MOV_B64 -1
-    ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5)
+    ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5)
     ; FLATSCR: $exec = S_MOV_B64 killed $sgpr12_sgpr13
     ; FLATSCR: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 = IMPLICIT_DEF
-    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
+    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
     ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
-    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
+    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
     ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
     ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
     ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
@@ -351,14 +351,14 @@ body:             |
     ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr16, 4, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
     ; FLATSCR: $sgpr12_sgpr13 = S_MOV_B64 $exec
     ; FLATSCR: $exec = S_MOV_B64 31
-    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 44, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.4, addrspace 5)
+    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 44, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.4, addrspace 5)
     ; FLATSCR: $exec = S_MOV_B64 -1
-    ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5)
+    ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5)
     ; FLATSCR: $exec = S_MOV_B64 killed $sgpr12_sgpr13
     ; FLATSCR: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = IMPLICIT_DEF
-    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
+    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
     ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
-    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
+    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
     ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
     ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
     ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
@@ -370,14 +370,14 @@ body:             |
     ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr19, 7, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
     ; FLATSCR: $sgpr12_sgpr13 = S_MOV_B64 $exec
     ; FLATSCR: $exec = S_MOV_B64 255
-    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 64, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.5, addrspace 5)
+    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 64, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.5, addrspace 5)
     ; FLATSCR: $exec = S_MOV_B64 -1
-    ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5)
+    ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5)
     ; FLATSCR: $exec = S_MOV_B64 killed $sgpr12_sgpr13
     ; FLATSCR: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 = IMPLICIT_DEF
-    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
+    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
     ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
-    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
+    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
     ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
     ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
     ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
@@ -397,14 +397,14 @@ body:             |
     ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr27, 15, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
     ; FLATSCR: $sgpr12_sgpr13 = S_MOV_B64 $exec
     ; FLATSCR: $exec = S_MOV_B64 65535
-    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 96, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.6, addrspace 5)
+    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 96, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.6, addrspace 5)
     ; FLATSCR: $exec = S_MOV_B64 -1
-    ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5)
+    ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5)
     ; FLATSCR: $exec = S_MOV_B64 killed $sgpr12_sgpr13
     ; FLATSCR: renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = IMPLICIT_DEF
-    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
+    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
     ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
-    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
+    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
     ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
     ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr64, 0, undef $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
     ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr65, 1, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
@@ -440,9 +440,9 @@ body:             |
     ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr95, 31, $vgpr0, implicit killed $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
     ; FLATSCR: $sgpr64_sgpr65 = S_MOV_B64 $exec
     ; FLATSCR: $exec = S_MOV_B64 4294967295
-    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 160, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.7, addrspace 5)
+    ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 160, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.7, addrspace 5)
     ; FLATSCR: $exec = S_MOV_B64 -1
-    ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5)
+    ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5)
     ; FLATSCR: $exec = S_MOV_B64 killed $sgpr64_sgpr65
     ; GCN64-MUBUF-LABEL: name: check_spill
     ; GCN64-MUBUF: liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr11
@@ -455,13 +455,13 @@ body:             |
     ; GCN64-MUBUF-NEXT: $sgpr28 = S_ADD_U32 $sgpr28, $sgpr11, implicit-def $scc, implicit-def $sgpr28_sgpr29_sgpr30_sgpr31
     ; GCN64-MUBUF-NEXT: $sgpr29 = S_ADDC_U32 $sgpr29, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr28_sgpr29_sgpr30_sgpr31
     ; GCN64-MUBUF-NEXT: renamable $sgpr12 = IMPLICIT_DEF
-    ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
-    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr12, 0, undef $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
+    ; GCN64-MUBUF-NEXT: $sgpr6_sgpr7 = S_MOV_B64 $exec
+    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr1
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 killed $sgpr12, 0, undef $vgpr1
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7, implicit killed $vgpr1
     ; GCN64-MUBUF-NEXT: renamable $sgpr12 = IMPLICIT_DEF
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0
@@ -471,33 +471,33 @@ body:             |
     ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-MUBUF-NEXT: renamable $sgpr12_sgpr13 = IMPLICIT_DEF
-    ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
-    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit killed $sgpr12_sgpr13
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 8, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
+    ; GCN64-MUBUF-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec
+    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr2
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr2, implicit $sgpr12_sgpr13
+    ; GCN64-MUBUF-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr13, 1, $vgpr2, implicit killed $sgpr12_sgpr13
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 8, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr2
     ; GCN64-MUBUF-NEXT: renamable $sgpr12_sgpr13 = IMPLICIT_DEF
-    ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
-    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 8, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
+    ; GCN64-MUBUF-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec
+    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr3
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr3 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr3, implicit $sgpr12_sgpr13
+    ; GCN64-MUBUF-NEXT: $vgpr3 = V_WRITELANE_B32 $sgpr13, 1, $vgpr3, implicit $sgpr12_sgpr13
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 8, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr3
     ; GCN64-MUBUF-NEXT: renamable $sgpr12_sgpr13_sgpr14 = IMPLICIT_DEF
-    ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
-    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 7, implicit-def $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 16, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
+    ; GCN64-MUBUF-NEXT: $sgpr6_sgpr7 = S_MOV_B64 $exec
+    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 7, implicit-def $vgpr1
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr1, implicit $sgpr12_sgpr13_sgpr14
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr13, 1, $vgpr1, implicit $sgpr12_sgpr13_sgpr14
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr14, 2, $vgpr1, implicit killed $sgpr12_sgpr13_sgpr14
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 16, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7, implicit killed $vgpr1
     ; GCN64-MUBUF-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15 = IMPLICIT_DEF
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 15, implicit-def $vgpr0
@@ -510,32 +510,32 @@ body:             |
     ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-MUBUF-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 = IMPLICIT_DEF
-    ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
-    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 31, implicit-def $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr16, 4, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 44, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
+    ; GCN64-MUBUF-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec
+    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 31, implicit-def $vgpr2
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr2, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+    ; GCN64-MUBUF-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr13, 1, $vgpr2, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+    ; GCN64-MUBUF-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr14, 2, $vgpr2, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+    ; GCN64-MUBUF-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr15, 3, $vgpr2, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+    ; GCN64-MUBUF-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr16, 4, $vgpr2, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 44, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr2
     ; GCN64-MUBUF-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = IMPLICIT_DEF
-    ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
-    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 255, implicit-def $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr16, 4, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr17, 5, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr18, 6, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr19, 7, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 64, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
+    ; GCN64-MUBUF-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec
+    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 255, implicit-def $vgpr1
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr13, 1, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr14, 2, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr15, 3, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr16, 4, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr17, 5, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr18, 6, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr19, 7, $vgpr1, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 64, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr1
     ; GCN64-MUBUF-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 = IMPLICIT_DEF
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 65535, implicit-def $vgpr0
@@ -560,44 +560,44 @@ body:             |
     ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-MUBUF-NEXT: renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = IMPLICIT_DEF
-    ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
-    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 4294967295, implicit-def $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr64, 0, undef $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr65, 1, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr66, 2, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr67, 3, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr68, 4, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr69, 5, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr70, 6, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr71, 7, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr72, 8, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr73, 9, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr74, 10, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr75, 11, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr76, 12, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr77, 13, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr78, 14, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr79, 15, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr80, 16, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr81, 17, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr82, 18, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr83, 19, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr84, 20, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr85, 21, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr86, 22, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr87, 23, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr88, 24, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr89, 25, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr90, 26, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr91, 27, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr92, 28, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr93, 29, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr94, 30, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr95, 31, $vgpr0, implicit killed $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 160, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
+    ; GCN64-MUBUF-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec
+    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 4294967295, implicit-def $vgpr1
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr64, 0, undef $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr65, 1, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr66, 2, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr67, 3, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr68, 4, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr69, 5, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr70, 6, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr71, 7, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr72, 8, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr73, 9, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr74, 10, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr75, 11, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr76, 12, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr77, 13, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr78, 14, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr79, 15, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr80, 16, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr81, 17, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr82, 18, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr83, 19, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr84, 20, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr85, 21, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr86, 22, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr87, 23, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr88, 24, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr89, 25, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr90, 26, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr91, 27, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr92, 28, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr93, 29, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr94, 30, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr95, 31, $vgpr1, implicit killed $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 160, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr1
     ; GCN64-MUBUF-NEXT: renamable $sgpr12 = IMPLICIT_DEF
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0
@@ -618,13 +618,13 @@ body:             |
     ; GCN32-MUBUF-NEXT: $sgpr96 = S_ADD_U32 $sgpr96, $sgpr11, implicit-def $scc, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99
     ; GCN32-MUBUF-NEXT: $sgpr97 = S_ADDC_U32 $sgpr97, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99
     ; GCN32-MUBUF-NEXT: renamable $sgpr12 = IMPLICIT_DEF
-    ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
-    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 1, implicit-def $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr12, 0, undef $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
+    ; GCN32-MUBUF-NEXT: $sgpr3 = S_MOV_B32 $exec_lo
+    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 1, implicit-def $vgpr1
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 killed $sgpr12, 0, undef $vgpr1
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr3, implicit killed $vgpr1
     ; GCN32-MUBUF-NEXT: renamable $sgpr12 = IMPLICIT_DEF
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 1, implicit-def $vgpr0
@@ -634,33 +634,33 @@ body:             |
     ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
     ; GCN32-MUBUF-NEXT: renamable $sgpr12_sgpr13 = IMPLICIT_DEF
-    ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
-    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 3, implicit-def $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit killed $sgpr12_sgpr13
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
+    ; GCN32-MUBUF-NEXT: $sgpr2 = S_MOV_B32 $exec_lo
+    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 3, implicit-def $vgpr2
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr2, implicit $sgpr12_sgpr13
+    ; GCN32-MUBUF-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr13, 1, $vgpr2, implicit killed $sgpr12_sgpr13
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr2, implicit killed $vgpr2
     ; GCN32-MUBUF-NEXT: renamable $sgpr12_sgpr13 = IMPLICIT_DEF
-    ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
-    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 3, implicit-def $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
+    ; GCN32-MUBUF-NEXT: $sgpr1 = S_MOV_B32 $exec_lo
+    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 3, implicit-def $vgpr3
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr3 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr3, implicit $sgpr12_sgpr13
+    ; GCN32-MUBUF-NEXT: $vgpr3 = V_WRITELANE_B32 $sgpr13, 1, $vgpr3, implicit $sgpr12_sgpr13
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1, implicit killed $vgpr3
     ; GCN32-MUBUF-NEXT: renamable $sgpr12_sgpr13_sgpr14 = IMPLICIT_DEF
-    ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
-    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 7, implicit-def $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 16, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
+    ; GCN32-MUBUF-NEXT: $sgpr3 = S_MOV_B32 $exec_lo
+    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 7, implicit-def $vgpr1
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr1, implicit $sgpr12_sgpr13_sgpr14
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr13, 1, $vgpr1, implicit $sgpr12_sgpr13_sgpr14
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr14, 2, $vgpr1, implicit killed $sgpr12_sgpr13_sgpr14
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 16, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr3, implicit killed $vgpr1
     ; GCN32-MUBUF-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15 = IMPLICIT_DEF
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 15, implicit-def $vgpr0
@@ -673,32 +673,32 @@ body:             |
     ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
     ; GCN32-MUBUF-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 = IMPLICIT_DEF
-    ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
-    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 31, implicit-def $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr16, 4, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 44, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
+    ; GCN32-MUBUF-NEXT: $sgpr2 = S_MOV_B32 $exec_lo
+    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 31, implicit-def $vgpr2
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr2, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+    ; GCN32-MUBUF-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr13, 1, $vgpr2, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+    ; GCN32-MUBUF-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr14, 2, $vgpr2, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+    ; GCN32-MUBUF-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr15, 3, $vgpr2, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+    ; GCN32-MUBUF-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr16, 4, $vgpr2, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 44, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr2, implicit killed $vgpr2
     ; GCN32-MUBUF-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = IMPLICIT_DEF
-    ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
-    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 255, implicit-def $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr16, 4, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr17, 5, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr18, 6, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr19, 7, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 64, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
+    ; GCN32-MUBUF-NEXT: $sgpr1 = S_MOV_B32 $exec_lo
+    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 255, implicit-def $vgpr1
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr13, 1, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr14, 2, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr15, 3, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr16, 4, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr17, 5, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr18, 6, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr19, 7, $vgpr1, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 64, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1, implicit killed $vgpr1
     ; GCN32-MUBUF-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 = IMPLICIT_DEF
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 65535, implicit-def $vgpr0
@@ -723,44 +723,44 @@ body:             |
     ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
     ; GCN32-MUBUF-NEXT: renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = IMPLICIT_DEF
-    ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
-    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 4294967295, implicit-def $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr64, 0, undef $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr65, 1, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr66, 2, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr67, 3, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr68, 4, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr69, 5, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr70, 6, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr71, 7, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr72, 8, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr73, 9, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr74, 10, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr75, 11, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr76, 12, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr77, 13, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr78, 14, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr79, 15, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr80, 16, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr81, 17, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr82, 18, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr83, 19, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr84, 20, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr85, 21, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr86, 22, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr87, 23, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr88, 24, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr89, 25, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr90, 26, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr91, 27, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr92, 28, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr93, 29, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr94, 30, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr95, 31, $vgpr0, implicit killed $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 160, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
+    ; GCN32-MUBUF-NEXT: $sgpr2 = S_MOV_B32 $exec_lo
+    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 4294967295, implicit-def $vgpr1
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr64, 0, undef $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr65, 1, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr66, 2, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr67, 3, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr68, 4, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr69, 5, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr70, 6, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr71, 7, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr72, 8, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr73, 9, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr74, 10, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr75, 11, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr76, 12, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr77, 13, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr78, 14, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr79, 15, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr80, 16, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr81, 17, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr82, 18, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr83, 19, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr84, 20, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr85, 21, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr86, 22, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr87, 23, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr88, 24, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr89, 25, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr90, 26, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr91, 27, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr92, 28, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr93, 29, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr94, 30, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr95, 31, $vgpr1, implicit killed $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 160, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr2, implicit killed $vgpr1
     ; GCN32-MUBUF-NEXT: renamable $sgpr12 = IMPLICIT_DEF
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 1, implicit-def $vgpr0
@@ -777,51 +777,51 @@ body:             |
     ; GCN64-FLATSCR-NEXT: $flat_scr_lo = S_ADD_U32 $sgpr0, $sgpr11, implicit-def $scc
     ; GCN64-FLATSCR-NEXT: $flat_scr_hi = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc
     ; GCN64-FLATSCR-NEXT: renamable $sgpr12 = IMPLICIT_DEF
-    ; GCN64-FLATSCR-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec
-    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr12, 0, undef $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0
+    ; GCN64-FLATSCR-NEXT: $sgpr6_sgpr7 = S_MOV_B64 $exec
+    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr1
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 killed $sgpr12, 0, undef $vgpr1
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7, implicit killed $vgpr1
     ; GCN64-FLATSCR-NEXT: renamable $sgpr12 = IMPLICIT_DEF
-    ; GCN64-FLATSCR-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec
+    ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0
     ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0
     ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0
+    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-FLATSCR-NEXT: renamable $sgpr12_sgpr13 = IMPLICIT_DEF
-    ; GCN64-FLATSCR-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec
-    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit killed $sgpr12_sgpr13
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0
+    ; GCN64-FLATSCR-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec
+    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr2
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr2, implicit $sgpr12_sgpr13
+    ; GCN64-FLATSCR-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr13, 1, $vgpr2, implicit killed $sgpr12_sgpr13
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr2
     ; GCN64-FLATSCR-NEXT: renamable $sgpr12_sgpr13 = IMPLICIT_DEF
     ; GCN64-FLATSCR-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec
-    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0
+    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr3
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr3 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr3, implicit $sgpr12_sgpr13
+    ; GCN64-FLATSCR-NEXT: $vgpr3 = V_WRITELANE_B32 $sgpr13, 1, $vgpr3, implicit $sgpr12_sgpr13
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr3
     ; GCN64-FLATSCR-NEXT: renamable $sgpr12_sgpr13_sgpr14 = IMPLICIT_DEF
-    ; GCN64-FLATSCR-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec
-    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 7, implicit-def $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0
+    ; GCN64-FLATSCR-NEXT: $sgpr6_sgpr7 = S_MOV_B64 $exec
+    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 7, implicit-def $vgpr1
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr1, implicit $sgpr12_sgpr13_sgpr14
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr13, 1, $vgpr1, implicit $sgpr12_sgpr13_sgpr14
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr14, 2, $vgpr1, implicit killed $sgpr12_sgpr13_sgpr14
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7, implicit killed $vgpr1
     ; GCN64-FLATSCR-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15 = IMPLICIT_DEF
-    ; GCN64-FLATSCR-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec
+    ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 15, implicit-def $vgpr0
     ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15
@@ -830,36 +830,36 @@ body:             |
     ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15
     ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 28, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.3, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0
+    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-FLATSCR-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 = IMPLICIT_DEF
-    ; GCN64-FLATSCR-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec
-    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 31, implicit-def $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr16, 4, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 44, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.4, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0
+    ; GCN64-FLATSCR-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec
+    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 31, implicit-def $vgpr2
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr2, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+    ; GCN64-FLATSCR-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr13, 1, $vgpr2, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+    ; GCN64-FLATSCR-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr14, 2, $vgpr2, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+    ; GCN64-FLATSCR-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr15, 3, $vgpr2, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+    ; GCN64-FLATSCR-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr16, 4, $vgpr2, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr33, 44, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.4, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr2
     ; GCN64-FLATSCR-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = IMPLICIT_DEF
     ; GCN64-FLATSCR-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec
-    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 255, implicit-def $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr16, 4, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr17, 5, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr18, 6, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr19, 7, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 64, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.5, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0
+    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 255, implicit-def $vgpr1
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr13, 1, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr14, 2, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr15, 3, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr16, 4, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr17, 5, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr18, 6, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr19, 7, $vgpr1, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 64, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.5, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr1
     ; GCN64-FLATSCR-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 = IMPLICIT_DEF
-    ; GCN64-FLATSCR-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec
+    ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 65535, implicit-def $vgpr0
     ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
@@ -880,55 +880,55 @@ body:             |
     ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr27, 15, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
     ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 96, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.6, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0
+    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-FLATSCR-NEXT: renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = IMPLICIT_DEF
-    ; GCN64-FLATSCR-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec
-    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 4294967295, implicit-def $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr64, 0, undef $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr65, 1, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr66, 2, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr67, 3, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr68, 4, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr69, 5, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr70, 6, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr71, 7, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr72, 8, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr73, 9, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr74, 10, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr75, 11, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr76, 12, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr77, 13, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr78, 14, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr79, 15, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr80, 16, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr81, 17, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr82, 18, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr83, 19, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr84, 20, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr85, 21, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr86, 22, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr87, 23, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr88, 24, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr89, 25, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr90, 26, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr91, 27, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr92, 28, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr93, 29, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr94, 30, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr95, 31, $vgpr0, implicit killed $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 160, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.7, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0
+    ; GCN64-FLATSCR-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec
+    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 4294967295, implicit-def $vgpr1
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr64, 0, undef $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr65, 1, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr66, 2, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr67, 3, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr68, 4, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr69, 5, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr70, 6, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr71, 7, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr72, 8, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr73, 9, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr74, 10, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr75, 11, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr76, 12, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr77, 13, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr78, 14, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr79, 15, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr80, 16, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr81, 17, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr82, 18, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr83, 19, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr84, 20, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr85, 21, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr86, 22, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr87, 23, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr88, 24, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr89, 25, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr90, 26, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr91, 27, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr92, 28, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr93, 29, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr94, 30, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr95, 31, $vgpr1, implicit killed $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 160, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.7, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr1
     ; GCN64-FLATSCR-NEXT: renamable $sgpr12 = IMPLICIT_DEF
-    ; GCN64-FLATSCR-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec
+    ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0
     ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0
-    ; GCN64-FLATSCR-NEXT: $sgpr9 = S_ADD_I32 $sgpr33, 4096, implicit-def dead $scc
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, killed $sgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, align 4096, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $sgpr2 = S_ADD_I32 $sgpr33, 4096, implicit-def dead $scc
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, killed $sgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, align 4096, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0
+    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     renamable $sgpr12 = IMPLICIT_DEF
     SI_SPILL_S32_SAVE killed $sgpr12, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
 
@@ -1011,30 +1011,30 @@ body:             |
     ; GCN64-MUBUF-NEXT: $sgpr31 = S_MOV_B32 14680064, implicit-def $sgpr28_sgpr29_sgpr30_sgpr31
     ; GCN64-MUBUF-NEXT: $sgpr28 = S_ADD_U32 $sgpr28, $sgpr11, implicit-def $scc, implicit-def $sgpr28_sgpr29_sgpr30_sgpr31
     ; GCN64-MUBUF-NEXT: $sgpr29 = S_ADDC_U32 $sgpr29, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr28_sgpr29_sgpr30_sgpr31
-    ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
-    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 killed $vgpr0, 0
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
-    ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
-    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 8, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13
-    ; GCN64-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 killed $vgpr0, 1
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
-    ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
-    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 7, implicit-def $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 16, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14
-    ; GCN64-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 $vgpr0, 1
-    ; GCN64-MUBUF-NEXT: $sgpr14 = V_READLANE_B32 killed $vgpr0, 2
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
+    ; GCN64-MUBUF-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec
+    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr1
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 killed $vgpr1, 0
+    ; GCN64-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr1
+    ; GCN64-MUBUF-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec
+    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr2
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 8, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr2, 0, implicit-def $sgpr12_sgpr13
+    ; GCN64-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 killed $vgpr2, 1
+    ; GCN64-MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr2
+    ; GCN64-MUBUF-NEXT: $sgpr6_sgpr7 = S_MOV_B64 $exec
+    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 7, implicit-def $vgpr3
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 16, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr3, 0, implicit-def $sgpr12_sgpr13_sgpr14
+    ; GCN64-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 $vgpr3, 1
+    ; GCN64-MUBUF-NEXT: $sgpr14 = V_READLANE_B32 killed $vgpr3, 2
+    ; GCN64-MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7, implicit killed $vgpr3
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 15, implicit-def $vgpr0
     ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
@@ -1045,31 +1045,31 @@ body:             |
     ; GCN64-MUBUF-NEXT: $sgpr15 = V_READLANE_B32 killed $vgpr0, 3
     ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
-    ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
-    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 31, implicit-def $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 44, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
-    ; GCN64-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 $vgpr0, 1
-    ; GCN64-MUBUF-NEXT: $sgpr14 = V_READLANE_B32 $vgpr0, 2
-    ; GCN64-MUBUF-NEXT: $sgpr15 = V_READLANE_B32 $vgpr0, 3
-    ; GCN64-MUBUF-NEXT: $sgpr16 = V_READLANE_B32 killed $vgpr0, 4
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
-    ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
-    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 255, implicit-def $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 64, 0, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN64-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 $vgpr0, 1
-    ; GCN64-MUBUF-NEXT: $sgpr14 = V_READLANE_B32 $vgpr0, 2
-    ; GCN64-MUBUF-NEXT: $sgpr15 = V_READLANE_B32 $vgpr0, 3
-    ; GCN64-MUBUF-NEXT: $sgpr16 = V_READLANE_B32 $vgpr0, 4
-    ; GCN64-MUBUF-NEXT: $sgpr17 = V_READLANE_B32 $vgpr0, 5
-    ; GCN64-MUBUF-NEXT: $sgpr18 = V_READLANE_B32 $vgpr0, 6
-    ; GCN64-MUBUF-NEXT: $sgpr19 = V_READLANE_B32 killed $vgpr0, 7
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
+    ; GCN64-MUBUF-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec
+    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 31, implicit-def $vgpr1
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 44, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr1, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+    ; GCN64-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 $vgpr1, 1
+    ; GCN64-MUBUF-NEXT: $sgpr14 = V_READLANE_B32 $vgpr1, 2
+    ; GCN64-MUBUF-NEXT: $sgpr15 = V_READLANE_B32 $vgpr1, 3
+    ; GCN64-MUBUF-NEXT: $sgpr16 = V_READLANE_B32 killed $vgpr1, 4
+    ; GCN64-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr1
+    ; GCN64-MUBUF-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec
+    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 255, implicit-def $vgpr2
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 64, 0, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr2, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN64-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 $vgpr2, 1
+    ; GCN64-MUBUF-NEXT: $sgpr14 = V_READLANE_B32 $vgpr2, 2
+    ; GCN64-MUBUF-NEXT: $sgpr15 = V_READLANE_B32 $vgpr2, 3
+    ; GCN64-MUBUF-NEXT: $sgpr16 = V_READLANE_B32 $vgpr2, 4
+    ; GCN64-MUBUF-NEXT: $sgpr17 = V_READLANE_B32 $vgpr2, 5
+    ; GCN64-MUBUF-NEXT: $sgpr18 = V_READLANE_B32 $vgpr2, 6
+    ; GCN64-MUBUF-NEXT: $sgpr19 = V_READLANE_B32 killed $vgpr2, 7
+    ; GCN64-MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr2
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 65535, implicit-def $vgpr0
     ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
@@ -1092,44 +1092,44 @@ body:             |
     ; GCN64-MUBUF-NEXT: $sgpr27 = V_READLANE_B32 killed $vgpr0, 15
     ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
-    ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
-    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 4294967295, implicit-def $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 160, 0, 0, implicit $exec :: (load (s32) from %stack.7, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $sgpr64 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $sgpr65 = V_READLANE_B32 $vgpr0, 1
-    ; GCN64-MUBUF-NEXT: $sgpr66 = V_READLANE_B32 $vgpr0, 2
-    ; GCN64-MUBUF-NEXT: $sgpr67 = V_READLANE_B32 $vgpr0, 3
-    ; GCN64-MUBUF-NEXT: $sgpr68 = V_READLANE_B32 $vgpr0, 4
-    ; GCN64-MUBUF-NEXT: $sgpr69 = V_READLANE_B32 $vgpr0, 5
-    ; GCN64-MUBUF-NEXT: $sgpr70 = V_READLANE_B32 $vgpr0, 6
-    ; GCN64-MUBUF-NEXT: $sgpr71 = V_READLANE_B32 $vgpr0, 7
-    ; GCN64-MUBUF-NEXT: $sgpr72 = V_READLANE_B32 $vgpr0, 8
-    ; GCN64-MUBUF-NEXT: $sgpr73 = V_READLANE_B32 $vgpr0, 9
-    ; GCN64-MUBUF-NEXT: $sgpr74 = V_READLANE_B32 $vgpr0, 10
-    ; GCN64-MUBUF-NEXT: $sgpr75 = V_READLANE_B32 $vgpr0, 11
-    ; GCN64-MUBUF-NEXT: $sgpr76 = V_READLANE_B32 $vgpr0, 12
-    ; GCN64-MUBUF-NEXT: $sgpr77 = V_READLANE_B32 $vgpr0, 13
-    ; GCN64-MUBUF-NEXT: $sgpr78 = V_READLANE_B32 $vgpr0, 14
-    ; GCN64-MUBUF-NEXT: $sgpr79 = V_READLANE_B32 $vgpr0, 15
-    ; GCN64-MUBUF-NEXT: $sgpr80 = V_READLANE_B32 $vgpr0, 16
-    ; GCN64-MUBUF-NEXT: $sgpr81 = V_READLANE_B32 $vgpr0, 17
-    ; GCN64-MUBUF-NEXT: $sgpr82 = V_READLANE_B32 $vgpr0, 18
-    ; GCN64-MUBUF-NEXT: $sgpr83 = V_READLANE_B32 $vgpr0, 19
-    ; GCN64-MUBUF-NEXT: $sgpr84 = V_READLANE_B32 $vgpr0, 20
-    ; GCN64-MUBUF-NEXT: $sgpr85 = V_READLANE_B32 $vgpr0, 21
-    ; GCN64-MUBUF-NEXT: $sgpr86 = V_READLANE_B32 $vgpr0, 22
-    ; GCN64-MUBUF-NEXT: $sgpr87 = V_READLANE_B32 $vgpr0, 23
-    ; GCN64-MUBUF-NEXT: $sgpr88 = V_READLANE_B32 $vgpr0, 24
-    ; GCN64-MUBUF-NEXT: $sgpr89 = V_READLANE_B32 $vgpr0, 25
-    ; GCN64-MUBUF-NEXT: $sgpr90 = V_READLANE_B32 $vgpr0, 26
-    ; GCN64-MUBUF-NEXT: $sgpr91 = V_READLANE_B32 $vgpr0, 27
-    ; GCN64-MUBUF-NEXT: $sgpr92 = V_READLANE_B32 $vgpr0, 28
-    ; GCN64-MUBUF-NEXT: $sgpr93 = V_READLANE_B32 $vgpr0, 29
-    ; GCN64-MUBUF-NEXT: $sgpr94 = V_READLANE_B32 $vgpr0, 30
-    ; GCN64-MUBUF-NEXT: $sgpr95 = V_READLANE_B32 killed $vgpr0, 31
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
+    ; GCN64-MUBUF-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec
+    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 4294967295, implicit-def $vgpr1
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 160, 0, 0, implicit $exec :: (load (s32) from %stack.7, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $sgpr64 = V_READLANE_B32 $vgpr1, 0, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $sgpr65 = V_READLANE_B32 $vgpr1, 1
+    ; GCN64-MUBUF-NEXT: $sgpr66 = V_READLANE_B32 $vgpr1, 2
+    ; GCN64-MUBUF-NEXT: $sgpr67 = V_READLANE_B32 $vgpr1, 3
+    ; GCN64-MUBUF-NEXT: $sgpr68 = V_READLANE_B32 $vgpr1, 4
+    ; GCN64-MUBUF-NEXT: $sgpr69 = V_READLANE_B32 $vgpr1, 5
+    ; GCN64-MUBUF-NEXT: $sgpr70 = V_READLANE_B32 $vgpr1, 6
+    ; GCN64-MUBUF-NEXT: $sgpr71 = V_READLANE_B32 $vgpr1, 7
+    ; GCN64-MUBUF-NEXT: $sgpr72 = V_READLANE_B32 $vgpr1, 8
+    ; GCN64-MUBUF-NEXT: $sgpr73 = V_READLANE_B32 $vgpr1, 9
+    ; GCN64-MUBUF-NEXT: $sgpr74 = V_READLANE_B32 $vgpr1, 10
+    ; GCN64-MUBUF-NEXT: $sgpr75 = V_READLANE_B32 $vgpr1, 11
+    ; GCN64-MUBUF-NEXT: $sgpr76 = V_READLANE_B32 $vgpr1, 12
+    ; GCN64-MUBUF-NEXT: $sgpr77 = V_READLANE_B32 $vgpr1, 13
+    ; GCN64-MUBUF-NEXT: $sgpr78 = V_READLANE_B32 $vgpr1, 14
+    ; GCN64-MUBUF-NEXT: $sgpr79 = V_READLANE_B32 $vgpr1, 15
+    ; GCN64-MUBUF-NEXT: $sgpr80 = V_READLANE_B32 $vgpr1, 16
+    ; GCN64-MUBUF-NEXT: $sgpr81 = V_READLANE_B32 $vgpr1, 17
+    ; GCN64-MUBUF-NEXT: $sgpr82 = V_READLANE_B32 $vgpr1, 18
+    ; GCN64-MUBUF-NEXT: $sgpr83 = V_READLANE_B32 $vgpr1, 19
+    ; GCN64-MUBUF-NEXT: $sgpr84 = V_READLANE_B32 $vgpr1, 20
+    ; GCN64-MUBUF-NEXT: $sgpr85 = V_READLANE_B32 $vgpr1, 21
+    ; GCN64-MUBUF-NEXT: $sgpr86 = V_READLANE_B32 $vgpr1, 22
+    ; GCN64-MUBUF-NEXT: $sgpr87 = V_READLANE_B32 $vgpr1, 23
+    ; GCN64-MUBUF-NEXT: $sgpr88 = V_READLANE_B32 $vgpr1, 24
+    ; GCN64-MUBUF-NEXT: $sgpr89 = V_READLANE_B32 $vgpr1, 25
+    ; GCN64-MUBUF-NEXT: $sgpr90 = V_READLANE_B32 $vgpr1, 26
+    ; GCN64-MUBUF-NEXT: $sgpr91 = V_READLANE_B32 $vgpr1, 27
+    ; GCN64-MUBUF-NEXT: $sgpr92 = V_READLANE_B32 $vgpr1, 28
+    ; GCN64-MUBUF-NEXT: $sgpr93 = V_READLANE_B32 $vgpr1, 29
+    ; GCN64-MUBUF-NEXT: $sgpr94 = V_READLANE_B32 $vgpr1, 30
+    ; GCN64-MUBUF-NEXT: $sgpr95 = V_READLANE_B32 killed $vgpr1, 31
+    ; GCN64-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr1
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0
     ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
@@ -1148,30 +1148,30 @@ body:             |
     ; GCN32-MUBUF-NEXT: $sgpr99 = S_MOV_B32 834756608, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99
     ; GCN32-MUBUF-NEXT: $sgpr96 = S_ADD_U32 $sgpr96, $sgpr11, implicit-def $scc, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99
     ; GCN32-MUBUF-NEXT: $sgpr97 = S_ADDC_U32 $sgpr97, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99
-    ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
-    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 1, implicit-def $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 killed $vgpr0, 0
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
-    ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
-    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 3, implicit-def $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13
-    ; GCN32-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 killed $vgpr0, 1
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
-    ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
-    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 7, implicit-def $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 16, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14
-    ; GCN32-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 $vgpr0, 1
-    ; GCN32-MUBUF-NEXT: $sgpr14 = V_READLANE_B32 killed $vgpr0, 2
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
+    ; GCN32-MUBUF-NEXT: $sgpr2 = S_MOV_B32 $exec_lo
+    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 1, implicit-def $vgpr1
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 killed $vgpr1, 0
+    ; GCN32-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr2, implicit killed $vgpr1
+    ; GCN32-MUBUF-NEXT: $sgpr1 = S_MOV_B32 $exec_lo
+    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 3, implicit-def $vgpr2
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr2, 0, implicit-def $sgpr12_sgpr13
+    ; GCN32-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 killed $vgpr2, 1
+    ; GCN32-MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1, implicit killed $vgpr2
+    ; GCN32-MUBUF-NEXT: $sgpr3 = S_MOV_B32 $exec_lo
+    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 7, implicit-def $vgpr3
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 16, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr3, 0, implicit-def $sgpr12_sgpr13_sgpr14
+    ; GCN32-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 $vgpr3, 1
+    ; GCN32-MUBUF-NEXT: $sgpr14 = V_READLANE_B32 killed $vgpr3, 2
+    ; GCN32-MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr3, implicit killed $vgpr3
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 15, implicit-def $vgpr0
     ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
@@ -1182,31 +1182,31 @@ body:             |
     ; GCN32-MUBUF-NEXT: $sgpr15 = V_READLANE_B32 killed $vgpr0, 3
     ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
-    ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
-    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 31, implicit-def $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 44, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
-    ; GCN32-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 $vgpr0, 1
-    ; GCN32-MUBUF-NEXT: $sgpr14 = V_READLANE_B32 $vgpr0, 2
-    ; GCN32-MUBUF-NEXT: $sgpr15 = V_READLANE_B32 $vgpr0, 3
-    ; GCN32-MUBUF-NEXT: $sgpr16 = V_READLANE_B32 killed $vgpr0, 4
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
-    ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
-    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 255, implicit-def $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 64, 0, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN32-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 $vgpr0, 1
-    ; GCN32-MUBUF-NEXT: $sgpr14 = V_READLANE_B32 $vgpr0, 2
-    ; GCN32-MUBUF-NEXT: $sgpr15 = V_READLANE_B32 $vgpr0, 3
-    ; GCN32-MUBUF-NEXT: $sgpr16 = V_READLANE_B32 $vgpr0, 4
-    ; GCN32-MUBUF-NEXT: $sgpr17 = V_READLANE_B32 $vgpr0, 5
-    ; GCN32-MUBUF-NEXT: $sgpr18 = V_READLANE_B32 $vgpr0, 6
-    ; GCN32-MUBUF-NEXT: $sgpr19 = V_READLANE_B32 killed $vgpr0, 7
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
+    ; GCN32-MUBUF-NEXT: $sgpr2 = S_MOV_B32 $exec_lo
+    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 31, implicit-def $vgpr1
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 44, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr1, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+    ; GCN32-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 $vgpr1, 1
+    ; GCN32-MUBUF-NEXT: $sgpr14 = V_READLANE_B32 $vgpr1, 2
+    ; GCN32-MUBUF-NEXT: $sgpr15 = V_READLANE_B32 $vgpr1, 3
+    ; GCN32-MUBUF-NEXT: $sgpr16 = V_READLANE_B32 killed $vgpr1, 4
+    ; GCN32-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr2, implicit killed $vgpr1
+    ; GCN32-MUBUF-NEXT: $sgpr1 = S_MOV_B32 $exec_lo
+    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 255, implicit-def $vgpr2
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 64, 0, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr2, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN32-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 $vgpr2, 1
+    ; GCN32-MUBUF-NEXT: $sgpr14 = V_READLANE_B32 $vgpr2, 2
+    ; GCN32-MUBUF-NEXT: $sgpr15 = V_READLANE_B32 $vgpr2, 3
+    ; GCN32-MUBUF-NEXT: $sgpr16 = V_READLANE_B32 $vgpr2, 4
+    ; GCN32-MUBUF-NEXT: $sgpr17 = V_READLANE_B32 $vgpr2, 5
+    ; GCN32-MUBUF-NEXT: $sgpr18 = V_READLANE_B32 $vgpr2, 6
+    ; GCN32-MUBUF-NEXT: $sgpr19 = V_READLANE_B32 killed $vgpr2, 7
+    ; GCN32-MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1, implicit killed $vgpr2
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 65535, implicit-def $vgpr0
     ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
@@ -1229,44 +1229,44 @@ body:             |
     ; GCN32-MUBUF-NEXT: $sgpr27 = V_READLANE_B32 killed $vgpr0, 15
     ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
-    ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
-    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 4294967295, implicit-def $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 160, 0, 0, implicit $exec :: (load (s32) from %stack.7, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $sgpr64 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $sgpr65 = V_READLANE_B32 $vgpr0, 1
-    ; GCN32-MUBUF-NEXT: $sgpr66 = V_READLANE_B32 $vgpr0, 2
-    ; GCN32-MUBUF-NEXT: $sgpr67 = V_READLANE_B32 $vgpr0, 3
-    ; GCN32-MUBUF-NEXT: $sgpr68 = V_READLANE_B32 $vgpr0, 4
-    ; GCN32-MUBUF-NEXT: $sgpr69 = V_READLANE_B32 $vgpr0, 5
-    ; GCN32-MUBUF-NEXT: $sgpr70 = V_READLANE_B32 $vgpr0, 6
-    ; GCN32-MUBUF-NEXT: $sgpr71 = V_READLANE_B32 $vgpr0, 7
-    ; GCN32-MUBUF-NEXT: $sgpr72 = V_READLANE_B32 $vgpr0, 8
-    ; GCN32-MUBUF-NEXT: $sgpr73 = V_READLANE_B32 $vgpr0, 9
-    ; GCN32-MUBUF-NEXT: $sgpr74 = V_READLANE_B32 $vgpr0, 10
-    ; GCN32-MUBUF-NEXT: $sgpr75 = V_READLANE_B32 $vgpr0, 11
-    ; GCN32-MUBUF-NEXT: $sgpr76 = V_READLANE_B32 $vgpr0, 12
-    ; GCN32-MUBUF-NEXT: $sgpr77 = V_READLANE_B32 $vgpr0, 13
-    ; GCN32-MUBUF-NEXT: $sgpr78 = V_READLANE_B32 $vgpr0, 14
-    ; GCN32-MUBUF-NEXT: $sgpr79 = V_READLANE_B32 $vgpr0, 15
-    ; GCN32-MUBUF-NEXT: $sgpr80 = V_READLANE_B32 $vgpr0, 16
-    ; GCN32-MUBUF-NEXT: $sgpr81 = V_READLANE_B32 $vgpr0, 17
-    ; GCN32-MUBUF-NEXT: $sgpr82 = V_READLANE_B32 $vgpr0, 18
-    ; GCN32-MUBUF-NEXT: $sgpr83 = V_READLANE_B32 $vgpr0, 19
-    ; GCN32-MUBUF-NEXT: $sgpr84 = V_READLANE_B32 $vgpr0, 20
-    ; GCN32-MUBUF-NEXT: $sgpr85 = V_READLANE_B32 $vgpr0, 21
-    ; GCN32-MUBUF-NEXT: $sgpr86 = V_READLANE_B32 $vgpr0, 22
-    ; GCN32-MUBUF-NEXT: $sgpr87 = V_READLANE_B32 $vgpr0, 23
-    ; GCN32-MUBUF-NEXT: $sgpr88 = V_READLANE_B32 $vgpr0, 24
-    ; GCN32-MUBUF-NEXT: $sgpr89 = V_READLANE_B32 $vgpr0, 25
-    ; GCN32-MUBUF-NEXT: $sgpr90 = V_READLANE_B32 $vgpr0, 26
-    ; GCN32-MUBUF-NEXT: $sgpr91 = V_READLANE_B32 $vgpr0, 27
-    ; GCN32-MUBUF-NEXT: $sgpr92 = V_READLANE_B32 $vgpr0, 28
-    ; GCN32-MUBUF-NEXT: $sgpr93 = V_READLANE_B32 $vgpr0, 29
-    ; GCN32-MUBUF-NEXT: $sgpr94 = V_READLANE_B32 $vgpr0, 30
-    ; GCN32-MUBUF-NEXT: $sgpr95 = V_READLANE_B32 killed $vgpr0, 31
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
+    ; GCN32-MUBUF-NEXT: $sgpr2 = S_MOV_B32 $exec_lo
+    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 4294967295, implicit-def $vgpr1
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 160, 0, 0, implicit $exec :: (load (s32) from %stack.7, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $sgpr64 = V_READLANE_B32 $vgpr1, 0, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $sgpr65 = V_READLANE_B32 $vgpr1, 1
+    ; GCN32-MUBUF-NEXT: $sgpr66 = V_READLANE_B32 $vgpr1, 2
+    ; GCN32-MUBUF-NEXT: $sgpr67 = V_READLANE_B32 $vgpr1, 3
+    ; GCN32-MUBUF-NEXT: $sgpr68 = V_READLANE_B32 $vgpr1, 4
+    ; GCN32-MUBUF-NEXT: $sgpr69 = V_READLANE_B32 $vgpr1, 5
+    ; GCN32-MUBUF-NEXT: $sgpr70 = V_READLANE_B32 $vgpr1, 6
+    ; GCN32-MUBUF-NEXT: $sgpr71 = V_READLANE_B32 $vgpr1, 7
+    ; GCN32-MUBUF-NEXT: $sgpr72 = V_READLANE_B32 $vgpr1, 8
+    ; GCN32-MUBUF-NEXT: $sgpr73 = V_READLANE_B32 $vgpr1, 9
+    ; GCN32-MUBUF-NEXT: $sgpr74 = V_READLANE_B32 $vgpr1, 10
+    ; GCN32-MUBUF-NEXT: $sgpr75 = V_READLANE_B32 $vgpr1, 11
+    ; GCN32-MUBUF-NEXT: $sgpr76 = V_READLANE_B32 $vgpr1, 12
+    ; GCN32-MUBUF-NEXT: $sgpr77 = V_READLANE_B32 $vgpr1, 13
+    ; GCN32-MUBUF-NEXT: $sgpr78 = V_READLANE_B32 $vgpr1, 14
+    ; GCN32-MUBUF-NEXT: $sgpr79 = V_READLANE_B32 $vgpr1, 15
+    ; GCN32-MUBUF-NEXT: $sgpr80 = V_READLANE_B32 $vgpr1, 16
+    ; GCN32-MUBUF-NEXT: $sgpr81 = V_READLANE_B32 $vgpr1, 17
+    ; GCN32-MUBUF-NEXT: $sgpr82 = V_READLANE_B32 $vgpr1, 18
+    ; GCN32-MUBUF-NEXT: $sgpr83 = V_READLANE_B32 $vgpr1, 19
+    ; GCN32-MUBUF-NEXT: $sgpr84 = V_READLANE_B32 $vgpr1, 20
+    ; GCN32-MUBUF-NEXT: $sgpr85 = V_READLANE_B32 $vgpr1, 21
+    ; GCN32-MUBUF-NEXT: $sgpr86 = V_READLANE_B32 $vgpr1, 22
+    ; GCN32-MUBUF-NEXT: $sgpr87 = V_READLANE_B32 $vgpr1, 23
+    ; GCN32-MUBUF-NEXT: $sgpr88 = V_READLANE_B32 $vgpr1, 24
+    ; GCN32-MUBUF-NEXT: $sgpr89 = V_READLANE_B32 $vgpr1, 25
+    ; GCN32-MUBUF-NEXT: $sgpr90 = V_READLANE_B32 $vgpr1, 26
+    ; GCN32-MUBUF-NEXT: $sgpr91 = V_READLANE_B32 $vgpr1, 27
+    ; GCN32-MUBUF-NEXT: $sgpr92 = V_READLANE_B32 $vgpr1, 28
+    ; GCN32-MUBUF-NEXT: $sgpr93 = V_READLANE_B32 $vgpr1, 29
+    ; GCN32-MUBUF-NEXT: $sgpr94 = V_READLANE_B32 $vgpr1, 30
+    ; GCN32-MUBUF-NEXT: $sgpr95 = V_READLANE_B32 killed $vgpr1, 31
+    ; GCN32-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr2, implicit killed $vgpr1
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 1, implicit-def $vgpr0
     ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
@@ -1281,31 +1281,31 @@ body:             |
     ; GCN64-FLATSCR-NEXT: $sgpr33 = S_MOV_B32 0
     ; GCN64-FLATSCR-NEXT: $flat_scr_lo = S_ADD_U32 $sgpr0, $sgpr11, implicit-def $scc
     ; GCN64-FLATSCR-NEXT: $flat_scr_hi = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc
+    ; GCN64-FLATSCR-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec
+    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr1
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $sgpr12 = V_READLANE_B32 killed $vgpr1, 0
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr1
     ; GCN64-FLATSCR-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec
-    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $sgpr12 = V_READLANE_B32 killed $vgpr0, 0
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0
-    ; GCN64-FLATSCR-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec
-    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13
-    ; GCN64-FLATSCR-NEXT: $sgpr13 = V_READLANE_B32 killed $vgpr0, 1
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0
-    ; GCN64-FLATSCR-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec
-    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 7, implicit-def $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14
-    ; GCN64-FLATSCR-NEXT: $sgpr13 = V_READLANE_B32 $vgpr0, 1
-    ; GCN64-FLATSCR-NEXT: $sgpr14 = V_READLANE_B32 killed $vgpr0, 2
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0
-    ; GCN64-FLATSCR-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec
+    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr2
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $sgpr12 = V_READLANE_B32 $vgpr2, 0, implicit-def $sgpr12_sgpr13
+    ; GCN64-FLATSCR-NEXT: $sgpr13 = V_READLANE_B32 killed $vgpr2, 1
+    ; GCN64-FLATSCR-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr2
+    ; GCN64-FLATSCR-NEXT: $sgpr6_sgpr7 = S_MOV_B64 $exec
+    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 7, implicit-def $vgpr3
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $sgpr12 = V_READLANE_B32 $vgpr3, 0, implicit-def $sgpr12_sgpr13_sgpr14
+    ; GCN64-FLATSCR-NEXT: $sgpr13 = V_READLANE_B32 $vgpr3, 1
+    ; GCN64-FLATSCR-NEXT: $sgpr14 = V_READLANE_B32 killed $vgpr3, 2
+    ; GCN64-FLATSCR-NEXT: $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7, implicit killed $vgpr3
+    ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 15, implicit-def $vgpr0
     ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 28, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.3, addrspace 5)
@@ -1314,33 +1314,33 @@ body:             |
     ; GCN64-FLATSCR-NEXT: $sgpr14 = V_READLANE_B32 $vgpr0, 2
     ; GCN64-FLATSCR-NEXT: $sgpr15 = V_READLANE_B32 killed $vgpr0, 3
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0
-    ; GCN64-FLATSCR-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec
-    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 31, implicit-def $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 44, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.4, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
-    ; GCN64-FLATSCR-NEXT: $sgpr13 = V_READLANE_B32 $vgpr0, 1
-    ; GCN64-FLATSCR-NEXT: $sgpr14 = V_READLANE_B32 $vgpr0, 2
-    ; GCN64-FLATSCR-NEXT: $sgpr15 = V_READLANE_B32 $vgpr0, 3
-    ; GCN64-FLATSCR-NEXT: $sgpr16 = V_READLANE_B32 killed $vgpr0, 4
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0
-    ; GCN64-FLATSCR-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec
-    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 255, implicit-def $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 64, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.5, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN64-FLATSCR-NEXT: $sgpr13 = V_READLANE_B32 $vgpr0, 1
-    ; GCN64-FLATSCR-NEXT: $sgpr14 = V_READLANE_B32 $vgpr0, 2
-    ; GCN64-FLATSCR-NEXT: $sgpr15 = V_READLANE_B32 $vgpr0, 3
-    ; GCN64-FLATSCR-NEXT: $sgpr16 = V_READLANE_B32 $vgpr0, 4
-    ; GCN64-FLATSCR-NEXT: $sgpr17 = V_READLANE_B32 $vgpr0, 5
-    ; GCN64-FLATSCR-NEXT: $sgpr18 = V_READLANE_B32 $vgpr0, 6
-    ; GCN64-FLATSCR-NEXT: $sgpr19 = V_READLANE_B32 killed $vgpr0, 7
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0
+    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
+    ; GCN64-FLATSCR-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec
+    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 31, implicit-def $vgpr1
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 44, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.4, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $sgpr12 = V_READLANE_B32 $vgpr1, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+    ; GCN64-FLATSCR-NEXT: $sgpr13 = V_READLANE_B32 $vgpr1, 1
+    ; GCN64-FLATSCR-NEXT: $sgpr14 = V_READLANE_B32 $vgpr1, 2
+    ; GCN64-FLATSCR-NEXT: $sgpr15 = V_READLANE_B32 $vgpr1, 3
+    ; GCN64-FLATSCR-NEXT: $sgpr16 = V_READLANE_B32 killed $vgpr1, 4
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr1
     ; GCN64-FLATSCR-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec
+    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 255, implicit-def $vgpr2
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 64, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.5, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $sgpr12 = V_READLANE_B32 $vgpr2, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN64-FLATSCR-NEXT: $sgpr13 = V_READLANE_B32 $vgpr2, 1
+    ; GCN64-FLATSCR-NEXT: $sgpr14 = V_READLANE_B32 $vgpr2, 2
+    ; GCN64-FLATSCR-NEXT: $sgpr15 = V_READLANE_B32 $vgpr2, 3
+    ; GCN64-FLATSCR-NEXT: $sgpr16 = V_READLANE_B32 $vgpr2, 4
+    ; GCN64-FLATSCR-NEXT: $sgpr17 = V_READLANE_B32 $vgpr2, 5
+    ; GCN64-FLATSCR-NEXT: $sgpr18 = V_READLANE_B32 $vgpr2, 6
+    ; GCN64-FLATSCR-NEXT: $sgpr19 = V_READLANE_B32 killed $vgpr2, 7
+    ; GCN64-FLATSCR-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr2
+    ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 65535, implicit-def $vgpr0
     ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 96, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.6, addrspace 5)
@@ -1361,53 +1361,53 @@ body:             |
     ; GCN64-FLATSCR-NEXT: $sgpr26 = V_READLANE_B32 $vgpr0, 14
     ; GCN64-FLATSCR-NEXT: $sgpr27 = V_READLANE_B32 killed $vgpr0, 15
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0
-    ; GCN64-FLATSCR-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec
-    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 4294967295, implicit-def $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 160, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.7, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $sgpr64 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $sgpr65 = V_READLANE_B32 $vgpr0, 1
-    ; GCN64-FLATSCR-NEXT: $sgpr66 = V_READLANE_B32 $vgpr0, 2
-    ; GCN64-FLATSCR-NEXT: $sgpr67 = V_READLANE_B32 $vgpr0, 3
-    ; GCN64-FLATSCR-NEXT: $sgpr68 = V_READLANE_B32 $vgpr0, 4
-    ; GCN64-FLATSCR-NEXT: $sgpr69 = V_READLANE_B32 $vgpr0, 5
-    ; GCN64-FLATSCR-NEXT: $sgpr70 = V_READLANE_B32 $vgpr0, 6
-    ; GCN64-FLATSCR-NEXT: $sgpr71 = V_READLANE_B32 $vgpr0, 7
-    ; GCN64-FLATSCR-NEXT: $sgpr72 = V_READLANE_B32 $vgpr0, 8
-    ; GCN64-FLATSCR-NEXT: $sgpr73 = V_READLANE_B32 $vgpr0, 9
-    ; GCN64-FLATSCR-NEXT: $sgpr74 = V_READLANE_B32 $vgpr0, 10
-    ; GCN64-FLATSCR-NEXT: $sgpr75 = V_READLANE_B32 $vgpr0, 11
-    ; GCN64-FLATSCR-NEXT: $sgpr76 = V_READLANE_B32 $vgpr0, 12
-    ; GCN64-FLATSCR-NEXT: $sgpr77 = V_READLANE_B32 $vgpr0, 13
-    ; GCN64-FLATSCR-NEXT: $sgpr78 = V_READLANE_B32 $vgpr0, 14
-    ; GCN64-FLATSCR-NEXT: $sgpr79 = V_READLANE_B32 $vgpr0, 15
-    ; GCN64-FLATSCR-NEXT: $sgpr80 = V_READLANE_B32 $vgpr0, 16
-    ; GCN64-FLATSCR-NEXT: $sgpr81 = V_READLANE_B32 $vgpr0, 17
-    ; GCN64-FLATSCR-NEXT: $sgpr82 = V_READLANE_B32 $vgpr0, 18
-    ; GCN64-FLATSCR-NEXT: $sgpr83 = V_READLANE_B32 $vgpr0, 19
-    ; GCN64-FLATSCR-NEXT: $sgpr84 = V_READLANE_B32 $vgpr0, 20
-    ; GCN64-FLATSCR-NEXT: $sgpr85 = V_READLANE_B32 $vgpr0, 21
-    ; GCN64-FLATSCR-NEXT: $sgpr86 = V_READLANE_B32 $vgpr0, 22
-    ; GCN64-FLATSCR-NEXT: $sgpr87 = V_READLANE_B32 $vgpr0, 23
-    ; GCN64-FLATSCR-NEXT: $sgpr88 = V_READLANE_B32 $vgpr0, 24
-    ; GCN64-FLATSCR-NEXT: $sgpr89 = V_READLANE_B32 $vgpr0, 25
-    ; GCN64-FLATSCR-NEXT: $sgpr90 = V_READLANE_B32 $vgpr0, 26
-    ; GCN64-FLATSCR-NEXT: $sgpr91 = V_READLANE_B32 $vgpr0, 27
-    ; GCN64-FLATSCR-NEXT: $sgpr92 = V_READLANE_B32 $vgpr0, 28
-    ; GCN64-FLATSCR-NEXT: $sgpr93 = V_READLANE_B32 $vgpr0, 29
-    ; GCN64-FLATSCR-NEXT: $sgpr94 = V_READLANE_B32 $vgpr0, 30
-    ; GCN64-FLATSCR-NEXT: $sgpr95 = V_READLANE_B32 killed $vgpr0, 31
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0
-    ; GCN64-FLATSCR-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec
+    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
+    ; GCN64-FLATSCR-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec
+    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 4294967295, implicit-def $vgpr1
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 160, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.7, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $sgpr64 = V_READLANE_B32 $vgpr1, 0, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $sgpr65 = V_READLANE_B32 $vgpr1, 1
+    ; GCN64-FLATSCR-NEXT: $sgpr66 = V_READLANE_B32 $vgpr1, 2
+    ; GCN64-FLATSCR-NEXT: $sgpr67 = V_READLANE_B32 $vgpr1, 3
+    ; GCN64-FLATSCR-NEXT: $sgpr68 = V_READLANE_B32 $vgpr1, 4
+    ; GCN64-FLATSCR-NEXT: $sgpr69 = V_READLANE_B32 $vgpr1, 5
+    ; GCN64-FLATSCR-NEXT: $sgpr70 = V_READLANE_B32 $vgpr1, 6
+    ; GCN64-FLATSCR-NEXT: $sgpr71 = V_READLANE_B32 $vgpr1, 7
+    ; GCN64-FLATSCR-NEXT: $sgpr72 = V_READLANE_B32 $vgpr1, 8
+    ; GCN64-FLATSCR-NEXT: $sgpr73 = V_READLANE_B32 $vgpr1, 9
+    ; GCN64-FLATSCR-NEXT: $sgpr74 = V_READLANE_B32 $vgpr1, 10
+    ; GCN64-FLATSCR-NEXT: $sgpr75 = V_READLANE_B32 $vgpr1, 11
+    ; GCN64-FLATSCR-NEXT: $sgpr76 = V_READLANE_B32 $vgpr1, 12
+    ; GCN64-FLATSCR-NEXT: $sgpr77 = V_READLANE_B32 $vgpr1, 13
+    ; GCN64-FLATSCR-NEXT: $sgpr78 = V_READLANE_B32 $vgpr1, 14
+    ; GCN64-FLATSCR-NEXT: $sgpr79 = V_READLANE_B32 $vgpr1, 15
+    ; GCN64-FLATSCR-NEXT: $sgpr80 = V_READLANE_B32 $vgpr1, 16
+    ; GCN64-FLATSCR-NEXT: $sgpr81 = V_READLANE_B32 $vgpr1, 17
+    ; GCN64-FLATSCR-NEXT: $sgpr82 = V_READLANE_B32 $vgpr1, 18
+    ; GCN64-FLATSCR-NEXT: $sgpr83 = V_READLANE_B32 $vgpr1, 19
+    ; GCN64-FLATSCR-NEXT: $sgpr84 = V_READLANE_B32 $vgpr1, 20
+    ; GCN64-FLATSCR-NEXT: $sgpr85 = V_READLANE_B32 $vgpr1, 21
+    ; GCN64-FLATSCR-NEXT: $sgpr86 = V_READLANE_B32 $vgpr1, 22
+    ; GCN64-FLATSCR-NEXT: $sgpr87 = V_READLANE_B32 $vgpr1, 23
+    ; GCN64-FLATSCR-NEXT: $sgpr88 = V_READLANE_B32 $vgpr1, 24
+    ; GCN64-FLATSCR-NEXT: $sgpr89 = V_READLANE_B32 $vgpr1, 25
+    ; GCN64-FLATSCR-NEXT: $sgpr90 = V_READLANE_B32 $vgpr1, 26
+    ; GCN64-FLATSCR-NEXT: $sgpr91 = V_READLANE_B32 $vgpr1, 27
+    ; GCN64-FLATSCR-NEXT: $sgpr92 = V_READLANE_B32 $vgpr1, 28
+    ; GCN64-FLATSCR-NEXT: $sgpr93 = V_READLANE_B32 $vgpr1, 29
+    ; GCN64-FLATSCR-NEXT: $sgpr94 = V_READLANE_B32 $vgpr1, 30
+    ; GCN64-FLATSCR-NEXT: $sgpr95 = V_READLANE_B32 killed $vgpr1, 31
+    ; GCN64-FLATSCR-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr1
+    ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0
     ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $sgpr9 = S_ADD_I32 $sgpr33, 4096, implicit-def dead $scc
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR killed $sgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, align 4096, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $sgpr2 = S_ADD_I32 $sgpr33, 4096, implicit-def dead $scc
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR killed $sgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, align 4096, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $sgpr12 = V_READLANE_B32 killed $vgpr0, 0
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0
+    ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     renamable $sgpr12 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
 
     renamable $sgpr12_sgpr13 = SI_SPILL_S64_RESTORE %stack.1, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32

diff  --git a/llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll b/llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll
index 515253e6a43f2..2b7635fede0bc 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll
@@ -1,22 +1,275 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple amdgcn-amd-amdhsa -mcpu=gfx803 -O0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
 
 define void @child_function() #0 {
+; GCN-LABEL: child_function:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    s_setpc_b64 s[30:31]
   call void asm sideeffect "", "~{vcc}" () #0
   ret void
 }
 
-; GCN-LABEL: {{^}}spill_sgpr_with_no_lower_vgpr_available:
-; GCN:  buffer_store_dword v255, off, s[0:3], s32
-; GCN:  v_writelane_b32 v255, s33, 2
-; GCN:  v_writelane_b32 v255, s30, 0
-; GCN:  v_writelane_b32 v255, s31, 1
-; GCN:  s_swappc_b64 s[30:31], s[4:5]
-; GCN:  v_readlane_b32 s31, v255, 1
-; GCN:  v_readlane_b32 s30, v255, 0
-; GCN:  v_readlane_b32 s33, v255, 2
-; GCN: ; NumVgprs: 256
-
 define void @spill_sgpr_with_no_lower_vgpr_available() #0 {
+; GCN-LABEL: spill_sgpr_with_no_lower_vgpr_available:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    s_or_saveexec_b64 s[4:5], -1
+; GCN-NEXT:    buffer_store_dword v255, off, s[0:3], s32 offset:448 ; 4-byte Folded Spill
+; GCN-NEXT:    s_mov_b64 exec, s[4:5]
+; GCN-NEXT:    v_writelane_b32 v255, s33, 2
+; GCN-NEXT:    s_mov_b32 s33, s32
+; GCN-NEXT:    s_add_i32 s32, s32, 0x7400
+; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s33 offset:440 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v41, off, s[0:3], s33 offset:436 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v42, off, s[0:3], s33 offset:432 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v43, off, s[0:3], s33 offset:428 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v44, off, s[0:3], s33 offset:424 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v45, off, s[0:3], s33 offset:420 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v46, off, s[0:3], s33 offset:416 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v47, off, s[0:3], s33 offset:412 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v56, off, s[0:3], s33 offset:408 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v57, off, s[0:3], s33 offset:404 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v58, off, s[0:3], s33 offset:400 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v59, off, s[0:3], s33 offset:396 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v60, off, s[0:3], s33 offset:392 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v61, off, s[0:3], s33 offset:388 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v62, off, s[0:3], s33 offset:384 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v63, off, s[0:3], s33 offset:380 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v72, off, s[0:3], s33 offset:376 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v73, off, s[0:3], s33 offset:372 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v74, off, s[0:3], s33 offset:368 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v75, off, s[0:3], s33 offset:364 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v76, off, s[0:3], s33 offset:360 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v77, off, s[0:3], s33 offset:356 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v78, off, s[0:3], s33 offset:352 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v79, off, s[0:3], s33 offset:348 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v88, off, s[0:3], s33 offset:344 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v89, off, s[0:3], s33 offset:340 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v90, off, s[0:3], s33 offset:336 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v91, off, s[0:3], s33 offset:332 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v92, off, s[0:3], s33 offset:328 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v93, off, s[0:3], s33 offset:324 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v94, off, s[0:3], s33 offset:320 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v95, off, s[0:3], s33 offset:316 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v104, off, s[0:3], s33 offset:312 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v105, off, s[0:3], s33 offset:308 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v106, off, s[0:3], s33 offset:304 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v107, off, s[0:3], s33 offset:300 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v108, off, s[0:3], s33 offset:296 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v109, off, s[0:3], s33 offset:292 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v110, off, s[0:3], s33 offset:288 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v111, off, s[0:3], s33 offset:284 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v120, off, s[0:3], s33 offset:280 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v121, off, s[0:3], s33 offset:276 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v122, off, s[0:3], s33 offset:272 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v123, off, s[0:3], s33 offset:268 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v124, off, s[0:3], s33 offset:264 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v125, off, s[0:3], s33 offset:260 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v126, off, s[0:3], s33 offset:256 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v127, off, s[0:3], s33 offset:252 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v136, off, s[0:3], s33 offset:248 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v137, off, s[0:3], s33 offset:244 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v138, off, s[0:3], s33 offset:240 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v139, off, s[0:3], s33 offset:236 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v140, off, s[0:3], s33 offset:232 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v141, off, s[0:3], s33 offset:228 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v142, off, s[0:3], s33 offset:224 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v143, off, s[0:3], s33 offset:220 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v152, off, s[0:3], s33 offset:216 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v153, off, s[0:3], s33 offset:212 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v154, off, s[0:3], s33 offset:208 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v155, off, s[0:3], s33 offset:204 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v156, off, s[0:3], s33 offset:200 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v157, off, s[0:3], s33 offset:196 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v158, off, s[0:3], s33 offset:192 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v159, off, s[0:3], s33 offset:188 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v168, off, s[0:3], s33 offset:184 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v169, off, s[0:3], s33 offset:180 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v170, off, s[0:3], s33 offset:176 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v171, off, s[0:3], s33 offset:172 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v172, off, s[0:3], s33 offset:168 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v173, off, s[0:3], s33 offset:164 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v174, off, s[0:3], s33 offset:160 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v175, off, s[0:3], s33 offset:156 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v184, off, s[0:3], s33 offset:152 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v185, off, s[0:3], s33 offset:148 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v186, off, s[0:3], s33 offset:144 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v187, off, s[0:3], s33 offset:140 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v188, off, s[0:3], s33 offset:136 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v189, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v190, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v191, off, s[0:3], s33 offset:124 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v200, off, s[0:3], s33 offset:120 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v201, off, s[0:3], s33 offset:116 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v202, off, s[0:3], s33 offset:112 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v203, off, s[0:3], s33 offset:108 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v204, off, s[0:3], s33 offset:104 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v205, off, s[0:3], s33 offset:100 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v206, off, s[0:3], s33 offset:96 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v207, off, s[0:3], s33 offset:92 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v216, off, s[0:3], s33 offset:88 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v217, off, s[0:3], s33 offset:84 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v218, off, s[0:3], s33 offset:80 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v219, off, s[0:3], s33 offset:76 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v220, off, s[0:3], s33 offset:72 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v221, off, s[0:3], s33 offset:68 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v222, off, s[0:3], s33 offset:64 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v223, off, s[0:3], s33 offset:60 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v232, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v233, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v234, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v235, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v236, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v237, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v238, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v239, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v248, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v249, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v250, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v251, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v252, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v253, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v254, off, s[0:3], s33 ; 4-byte Folded Spill
+; GCN-NEXT:    v_writelane_b32 v255, s30, 0
+; GCN-NEXT:    v_writelane_b32 v255, s31, 1
+; GCN-NEXT:    v_mov_b32_e32 v0, 0
+; GCN-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:444
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    s_getpc_b64 s[4:5]
+; GCN-NEXT:    s_add_u32 s4, s4, child_function at gotpcrel32@lo+4
+; GCN-NEXT:    s_addc_u32 s5, s5, child_function at gotpcrel32@hi+12
+; GCN-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GCN-NEXT:    s_mov_b64 s[10:11], s[2:3]
+; GCN-NEXT:    s_mov_b64 s[8:9], s[0:1]
+; GCN-NEXT:    s_mov_b64 s[0:1], s[8:9]
+; GCN-NEXT:    s_mov_b64 s[2:3], s[10:11]
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GCN-NEXT:    v_readlane_b32 s31, v255, 1
+; GCN-NEXT:    v_readlane_b32 s30, v255, 0
+; GCN-NEXT:    buffer_load_dword v254, off, s[0:3], s33 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v253, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v252, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v251, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v250, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v249, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v248, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v239, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v238, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v237, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v236, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v235, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v234, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v233, off, s[0:3], s33 offset:52 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v232, off, s[0:3], s33 offset:56 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v223, off, s[0:3], s33 offset:60 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v222, off, s[0:3], s33 offset:64 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v221, off, s[0:3], s33 offset:68 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v220, off, s[0:3], s33 offset:72 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v219, off, s[0:3], s33 offset:76 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v218, off, s[0:3], s33 offset:80 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v217, off, s[0:3], s33 offset:84 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v216, off, s[0:3], s33 offset:88 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v207, off, s[0:3], s33 offset:92 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v206, off, s[0:3], s33 offset:96 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v205, off, s[0:3], s33 offset:100 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v204, off, s[0:3], s33 offset:104 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v203, off, s[0:3], s33 offset:108 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v202, off, s[0:3], s33 offset:112 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v201, off, s[0:3], s33 offset:116 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v200, off, s[0:3], s33 offset:120 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v191, off, s[0:3], s33 offset:124 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v190, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v189, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v188, off, s[0:3], s33 offset:136 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v187, off, s[0:3], s33 offset:140 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v186, off, s[0:3], s33 offset:144 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v185, off, s[0:3], s33 offset:148 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v184, off, s[0:3], s33 offset:152 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v175, off, s[0:3], s33 offset:156 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v174, off, s[0:3], s33 offset:160 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v173, off, s[0:3], s33 offset:164 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v172, off, s[0:3], s33 offset:168 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v171, off, s[0:3], s33 offset:172 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v170, off, s[0:3], s33 offset:176 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v169, off, s[0:3], s33 offset:180 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v168, off, s[0:3], s33 offset:184 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v159, off, s[0:3], s33 offset:188 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v158, off, s[0:3], s33 offset:192 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v157, off, s[0:3], s33 offset:196 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v156, off, s[0:3], s33 offset:200 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v155, off, s[0:3], s33 offset:204 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v154, off, s[0:3], s33 offset:208 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v153, off, s[0:3], s33 offset:212 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v152, off, s[0:3], s33 offset:216 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v143, off, s[0:3], s33 offset:220 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v142, off, s[0:3], s33 offset:224 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v141, off, s[0:3], s33 offset:228 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v140, off, s[0:3], s33 offset:232 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v139, off, s[0:3], s33 offset:236 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v138, off, s[0:3], s33 offset:240 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v137, off, s[0:3], s33 offset:244 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v136, off, s[0:3], s33 offset:248 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v127, off, s[0:3], s33 offset:252 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v126, off, s[0:3], s33 offset:256 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v125, off, s[0:3], s33 offset:260 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v124, off, s[0:3], s33 offset:264 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v123, off, s[0:3], s33 offset:268 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v122, off, s[0:3], s33 offset:272 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v121, off, s[0:3], s33 offset:276 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v120, off, s[0:3], s33 offset:280 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v111, off, s[0:3], s33 offset:284 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v110, off, s[0:3], s33 offset:288 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v109, off, s[0:3], s33 offset:292 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v108, off, s[0:3], s33 offset:296 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v107, off, s[0:3], s33 offset:300 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v106, off, s[0:3], s33 offset:304 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v105, off, s[0:3], s33 offset:308 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v104, off, s[0:3], s33 offset:312 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v95, off, s[0:3], s33 offset:316 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v94, off, s[0:3], s33 offset:320 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v93, off, s[0:3], s33 offset:324 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v92, off, s[0:3], s33 offset:328 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v91, off, s[0:3], s33 offset:332 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v90, off, s[0:3], s33 offset:336 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v89, off, s[0:3], s33 offset:340 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v88, off, s[0:3], s33 offset:344 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v79, off, s[0:3], s33 offset:348 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v78, off, s[0:3], s33 offset:352 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v77, off, s[0:3], s33 offset:356 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v76, off, s[0:3], s33 offset:360 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v75, off, s[0:3], s33 offset:364 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v74, off, s[0:3], s33 offset:368 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v73, off, s[0:3], s33 offset:372 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v72, off, s[0:3], s33 offset:376 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v63, off, s[0:3], s33 offset:380 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v62, off, s[0:3], s33 offset:384 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v61, off, s[0:3], s33 offset:388 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v60, off, s[0:3], s33 offset:392 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v59, off, s[0:3], s33 offset:396 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v58, off, s[0:3], s33 offset:400 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v57, off, s[0:3], s33 offset:404 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v56, off, s[0:3], s33 offset:408 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v47, off, s[0:3], s33 offset:412 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v46, off, s[0:3], s33 offset:416 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v45, off, s[0:3], s33 offset:420 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v44, off, s[0:3], s33 offset:424 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v43, off, s[0:3], s33 offset:428 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v42, off, s[0:3], s33 offset:432 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v41, off, s[0:3], s33 offset:436 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s33 offset:440 ; 4-byte Folded Reload
+; GCN-NEXT:    s_add_i32 s32, s32, 0xffff8c00
+; GCN-NEXT:    v_readlane_b32 s33, v255, 2
+; GCN-NEXT:    s_or_saveexec_b64 s[4:5], -1
+; GCN-NEXT:    buffer_load_dword v255, off, s[0:3], s32 offset:448 ; 4-byte Folded Reload
+; GCN-NEXT:    s_mov_b64 exec, s[4:5]
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    s_setpc_b64 s[30:31]
   %alloca = alloca i32, align 4, addrspace(5)
   store volatile i32 0, i32 addrspace(5)* %alloca
 
@@ -51,17 +304,262 @@ define void @spill_sgpr_with_no_lower_vgpr_available() #0 {
   ret void
 }
 
-; GCN-LABEL: {{^}}spill_to_lowest_available_vgpr:
-; GCN:  buffer_store_dword v254, off, s[0:3], s32
-; GCN:  v_writelane_b32 v254, s33, 2
-; GCN:  v_writelane_b32 v254, s30, 0
-; GCN:  v_writelane_b32 v254, s31, 1
-; GCN:  s_swappc_b64 s[30:31], s[4:5]
-; GCN:  v_readlane_b32 s31, v254, 1
-; GCN:  v_readlane_b32 s30, v254, 0
-; GCN:  v_readlane_b32 s33, v254, 2
-
 define void @spill_to_lowest_available_vgpr() #0 {
+; GCN-LABEL: spill_to_lowest_available_vgpr:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    s_or_saveexec_b64 s[4:5], -1
+; GCN-NEXT:    buffer_store_dword v254, off, s[0:3], s32 offset:444 ; 4-byte Folded Spill
+; GCN-NEXT:    s_mov_b64 exec, s[4:5]
+; GCN-NEXT:    v_writelane_b32 v254, s33, 2
+; GCN-NEXT:    s_mov_b32 s33, s32
+; GCN-NEXT:    s_add_i32 s32, s32, 0x7400
+; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s33 offset:436 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v41, off, s[0:3], s33 offset:432 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v42, off, s[0:3], s33 offset:428 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v43, off, s[0:3], s33 offset:424 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v44, off, s[0:3], s33 offset:420 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v45, off, s[0:3], s33 offset:416 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v46, off, s[0:3], s33 offset:412 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v47, off, s[0:3], s33 offset:408 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v56, off, s[0:3], s33 offset:404 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v57, off, s[0:3], s33 offset:400 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v58, off, s[0:3], s33 offset:396 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v59, off, s[0:3], s33 offset:392 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v60, off, s[0:3], s33 offset:388 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v61, off, s[0:3], s33 offset:384 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v62, off, s[0:3], s33 offset:380 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v63, off, s[0:3], s33 offset:376 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v72, off, s[0:3], s33 offset:372 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v73, off, s[0:3], s33 offset:368 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v74, off, s[0:3], s33 offset:364 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v75, off, s[0:3], s33 offset:360 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v76, off, s[0:3], s33 offset:356 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v77, off, s[0:3], s33 offset:352 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v78, off, s[0:3], s33 offset:348 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v79, off, s[0:3], s33 offset:344 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v88, off, s[0:3], s33 offset:340 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v89, off, s[0:3], s33 offset:336 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v90, off, s[0:3], s33 offset:332 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v91, off, s[0:3], s33 offset:328 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v92, off, s[0:3], s33 offset:324 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v93, off, s[0:3], s33 offset:320 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v94, off, s[0:3], s33 offset:316 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v95, off, s[0:3], s33 offset:312 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v104, off, s[0:3], s33 offset:308 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v105, off, s[0:3], s33 offset:304 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v106, off, s[0:3], s33 offset:300 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v107, off, s[0:3], s33 offset:296 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v108, off, s[0:3], s33 offset:292 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v109, off, s[0:3], s33 offset:288 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v110, off, s[0:3], s33 offset:284 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v111, off, s[0:3], s33 offset:280 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v120, off, s[0:3], s33 offset:276 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v121, off, s[0:3], s33 offset:272 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v122, off, s[0:3], s33 offset:268 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v123, off, s[0:3], s33 offset:264 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v124, off, s[0:3], s33 offset:260 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v125, off, s[0:3], s33 offset:256 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v126, off, s[0:3], s33 offset:252 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v127, off, s[0:3], s33 offset:248 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v136, off, s[0:3], s33 offset:244 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v137, off, s[0:3], s33 offset:240 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v138, off, s[0:3], s33 offset:236 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v139, off, s[0:3], s33 offset:232 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v140, off, s[0:3], s33 offset:228 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v141, off, s[0:3], s33 offset:224 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v142, off, s[0:3], s33 offset:220 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v143, off, s[0:3], s33 offset:216 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v152, off, s[0:3], s33 offset:212 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v153, off, s[0:3], s33 offset:208 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v154, off, s[0:3], s33 offset:204 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v155, off, s[0:3], s33 offset:200 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v156, off, s[0:3], s33 offset:196 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v157, off, s[0:3], s33 offset:192 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v158, off, s[0:3], s33 offset:188 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v159, off, s[0:3], s33 offset:184 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v168, off, s[0:3], s33 offset:180 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v169, off, s[0:3], s33 offset:176 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v170, off, s[0:3], s33 offset:172 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v171, off, s[0:3], s33 offset:168 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v172, off, s[0:3], s33 offset:164 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v173, off, s[0:3], s33 offset:160 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v174, off, s[0:3], s33 offset:156 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v175, off, s[0:3], s33 offset:152 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v184, off, s[0:3], s33 offset:148 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v185, off, s[0:3], s33 offset:144 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v186, off, s[0:3], s33 offset:140 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v187, off, s[0:3], s33 offset:136 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v188, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v189, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v190, off, s[0:3], s33 offset:124 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v191, off, s[0:3], s33 offset:120 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v200, off, s[0:3], s33 offset:116 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v201, off, s[0:3], s33 offset:112 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v202, off, s[0:3], s33 offset:108 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v203, off, s[0:3], s33 offset:104 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v204, off, s[0:3], s33 offset:100 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v205, off, s[0:3], s33 offset:96 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v206, off, s[0:3], s33 offset:92 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v207, off, s[0:3], s33 offset:88 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v216, off, s[0:3], s33 offset:84 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v217, off, s[0:3], s33 offset:80 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v218, off, s[0:3], s33 offset:76 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v219, off, s[0:3], s33 offset:72 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v220, off, s[0:3], s33 offset:68 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v221, off, s[0:3], s33 offset:64 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v222, off, s[0:3], s33 offset:60 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v223, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v232, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v233, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v234, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v235, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v236, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v237, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v238, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v239, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v248, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v249, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v250, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v251, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v252, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v253, off, s[0:3], s33 ; 4-byte Folded Spill
+; GCN-NEXT:    v_writelane_b32 v254, s30, 0
+; GCN-NEXT:    v_writelane_b32 v254, s31, 1
+; GCN-NEXT:    v_mov_b32_e32 v0, 0
+; GCN-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:440
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    s_getpc_b64 s[4:5]
+; GCN-NEXT:    s_add_u32 s4, s4, child_function at gotpcrel32@lo+4
+; GCN-NEXT:    s_addc_u32 s5, s5, child_function at gotpcrel32@hi+12
+; GCN-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GCN-NEXT:    s_mov_b64 s[10:11], s[2:3]
+; GCN-NEXT:    s_mov_b64 s[8:9], s[0:1]
+; GCN-NEXT:    s_mov_b64 s[0:1], s[8:9]
+; GCN-NEXT:    s_mov_b64 s[2:3], s[10:11]
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GCN-NEXT:    v_readlane_b32 s31, v254, 1
+; GCN-NEXT:    v_readlane_b32 s30, v254, 0
+; GCN-NEXT:    buffer_load_dword v253, off, s[0:3], s33 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v252, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v251, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v250, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v249, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v248, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v239, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v238, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v237, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v236, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v235, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v234, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v233, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v232, off, s[0:3], s33 offset:52 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v223, off, s[0:3], s33 offset:56 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v222, off, s[0:3], s33 offset:60 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v221, off, s[0:3], s33 offset:64 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v220, off, s[0:3], s33 offset:68 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v219, off, s[0:3], s33 offset:72 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v218, off, s[0:3], s33 offset:76 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v217, off, s[0:3], s33 offset:80 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v216, off, s[0:3], s33 offset:84 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v207, off, s[0:3], s33 offset:88 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v206, off, s[0:3], s33 offset:92 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v205, off, s[0:3], s33 offset:96 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v204, off, s[0:3], s33 offset:100 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v203, off, s[0:3], s33 offset:104 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v202, off, s[0:3], s33 offset:108 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v201, off, s[0:3], s33 offset:112 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v200, off, s[0:3], s33 offset:116 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v191, off, s[0:3], s33 offset:120 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v190, off, s[0:3], s33 offset:124 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v189, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v188, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v187, off, s[0:3], s33 offset:136 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v186, off, s[0:3], s33 offset:140 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v185, off, s[0:3], s33 offset:144 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v184, off, s[0:3], s33 offset:148 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v175, off, s[0:3], s33 offset:152 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v174, off, s[0:3], s33 offset:156 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v173, off, s[0:3], s33 offset:160 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v172, off, s[0:3], s33 offset:164 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v171, off, s[0:3], s33 offset:168 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v170, off, s[0:3], s33 offset:172 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v169, off, s[0:3], s33 offset:176 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v168, off, s[0:3], s33 offset:180 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v159, off, s[0:3], s33 offset:184 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v158, off, s[0:3], s33 offset:188 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v157, off, s[0:3], s33 offset:192 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v156, off, s[0:3], s33 offset:196 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v155, off, s[0:3], s33 offset:200 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v154, off, s[0:3], s33 offset:204 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v153, off, s[0:3], s33 offset:208 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v152, off, s[0:3], s33 offset:212 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v143, off, s[0:3], s33 offset:216 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v142, off, s[0:3], s33 offset:220 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v141, off, s[0:3], s33 offset:224 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v140, off, s[0:3], s33 offset:228 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v139, off, s[0:3], s33 offset:232 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v138, off, s[0:3], s33 offset:236 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v137, off, s[0:3], s33 offset:240 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v136, off, s[0:3], s33 offset:244 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v127, off, s[0:3], s33 offset:248 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v126, off, s[0:3], s33 offset:252 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v125, off, s[0:3], s33 offset:256 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v124, off, s[0:3], s33 offset:260 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v123, off, s[0:3], s33 offset:264 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v122, off, s[0:3], s33 offset:268 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v121, off, s[0:3], s33 offset:272 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v120, off, s[0:3], s33 offset:276 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v111, off, s[0:3], s33 offset:280 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v110, off, s[0:3], s33 offset:284 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v109, off, s[0:3], s33 offset:288 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v108, off, s[0:3], s33 offset:292 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v107, off, s[0:3], s33 offset:296 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v106, off, s[0:3], s33 offset:300 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v105, off, s[0:3], s33 offset:304 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v104, off, s[0:3], s33 offset:308 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v95, off, s[0:3], s33 offset:312 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v94, off, s[0:3], s33 offset:316 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v93, off, s[0:3], s33 offset:320 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v92, off, s[0:3], s33 offset:324 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v91, off, s[0:3], s33 offset:328 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v90, off, s[0:3], s33 offset:332 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v89, off, s[0:3], s33 offset:336 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v88, off, s[0:3], s33 offset:340 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v79, off, s[0:3], s33 offset:344 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v78, off, s[0:3], s33 offset:348 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v77, off, s[0:3], s33 offset:352 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v76, off, s[0:3], s33 offset:356 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v75, off, s[0:3], s33 offset:360 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v74, off, s[0:3], s33 offset:364 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v73, off, s[0:3], s33 offset:368 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v72, off, s[0:3], s33 offset:372 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v63, off, s[0:3], s33 offset:376 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v62, off, s[0:3], s33 offset:380 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v61, off, s[0:3], s33 offset:384 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v60, off, s[0:3], s33 offset:388 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v59, off, s[0:3], s33 offset:392 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v58, off, s[0:3], s33 offset:396 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v57, off, s[0:3], s33 offset:400 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v56, off, s[0:3], s33 offset:404 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v47, off, s[0:3], s33 offset:408 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v46, off, s[0:3], s33 offset:412 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v45, off, s[0:3], s33 offset:416 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v44, off, s[0:3], s33 offset:420 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v43, off, s[0:3], s33 offset:424 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v42, off, s[0:3], s33 offset:428 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v41, off, s[0:3], s33 offset:432 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s33 offset:436 ; 4-byte Folded Reload
+; GCN-NEXT:    s_add_i32 s32, s32, 0xffff8c00
+; GCN-NEXT:    v_readlane_b32 s33, v254, 2
+; GCN-NEXT:    s_or_saveexec_b64 s[4:5], -1
+; GCN-NEXT:    buffer_load_dword v254, off, s[0:3], s32 offset:444 ; 4-byte Folded Reload
+; GCN-NEXT:    s_mov_b64 exec, s[4:5]
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    s_setpc_b64 s[30:31]
   %alloca = alloca i32, align 4, addrspace(5)
   store volatile i32 0, i32 addrspace(5)* %alloca
 
@@ -96,14 +594,254 @@ define void @spill_to_lowest_available_vgpr() #0 {
   ret void
 }
 
-; GCN-LABEL: {{^}}spill_sgpr_with_sgpr_uses:
-; GCN-NOT:  buffer_store_dword v255, off, s[0:3], s32
-; GCN: ; def s4
-; GCN: v_writelane_b32 v254, s4, 0
-; GCN: v_readlane_b32 s4, v254, 0
-; GCN: ; use s4
-
 define void @spill_sgpr_with_sgpr_uses() #0 {
+; GCN-LABEL: spill_sgpr_with_sgpr_uses:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    s_or_saveexec_b64 s[4:5], -1
+; GCN-NEXT:    buffer_store_dword v254, off, s[0:3], s32 offset:444 ; 4-byte Folded Spill
+; GCN-NEXT:    s_mov_b64 exec, s[4:5]
+; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s32 offset:436 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v41, off, s[0:3], s32 offset:432 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v42, off, s[0:3], s32 offset:428 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v43, off, s[0:3], s32 offset:424 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v44, off, s[0:3], s32 offset:420 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v45, off, s[0:3], s32 offset:416 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v46, off, s[0:3], s32 offset:412 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v47, off, s[0:3], s32 offset:408 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v56, off, s[0:3], s32 offset:404 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v57, off, s[0:3], s32 offset:400 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v58, off, s[0:3], s32 offset:396 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v59, off, s[0:3], s32 offset:392 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v60, off, s[0:3], s32 offset:388 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v61, off, s[0:3], s32 offset:384 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v62, off, s[0:3], s32 offset:380 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v63, off, s[0:3], s32 offset:376 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v72, off, s[0:3], s32 offset:372 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v73, off, s[0:3], s32 offset:368 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v74, off, s[0:3], s32 offset:364 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v75, off, s[0:3], s32 offset:360 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v76, off, s[0:3], s32 offset:356 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v77, off, s[0:3], s32 offset:352 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v78, off, s[0:3], s32 offset:348 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v79, off, s[0:3], s32 offset:344 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v88, off, s[0:3], s32 offset:340 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v89, off, s[0:3], s32 offset:336 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v90, off, s[0:3], s32 offset:332 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v91, off, s[0:3], s32 offset:328 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v92, off, s[0:3], s32 offset:324 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v93, off, s[0:3], s32 offset:320 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v94, off, s[0:3], s32 offset:316 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v95, off, s[0:3], s32 offset:312 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v104, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v105, off, s[0:3], s32 offset:304 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v106, off, s[0:3], s32 offset:300 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v107, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v108, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v109, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v110, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v111, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v120, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v121, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v122, off, s[0:3], s32 offset:268 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v123, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v124, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v125, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v126, off, s[0:3], s32 offset:252 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v127, off, s[0:3], s32 offset:248 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v136, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v137, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v138, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v139, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v140, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v141, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v142, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v143, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v152, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v153, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v154, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v155, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v156, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v157, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v158, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v159, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v168, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v169, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v170, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v171, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v172, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v173, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v174, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v175, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v184, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v185, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v186, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v187, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v188, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v189, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v190, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v191, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v200, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v201, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v202, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v203, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v204, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v205, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v206, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v207, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v216, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v217, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v218, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v219, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v220, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v221, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v222, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v223, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v232, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v233, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v234, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v235, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v236, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v237, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v238, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v239, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v248, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v249, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v250, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v251, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v252, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v253, off, s[0:3], s32 ; 4-byte Folded Spill
+; GCN-NEXT:    v_mov_b32_e32 v0, 0
+; GCN-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:440
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ; def s4
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    v_writelane_b32 v254, s4, 0
+; GCN-NEXT:    s_cbranch_scc1 .LBB3_2
+; GCN-NEXT:  ; %bb.1: ; %bb0
+; GCN-NEXT:    v_readlane_b32 s4, v254, 0
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ; use s4
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:  .LBB3_2: ; %ret
+; GCN-NEXT:    buffer_load_dword v253, off, s[0:3], s32 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v252, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v251, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v250, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v249, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v248, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v239, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v238, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v237, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v236, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v235, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v234, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v233, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v232, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v223, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v222, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v221, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v220, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v219, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v218, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v217, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v216, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v207, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v206, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v205, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v204, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v203, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v202, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v201, off, s[0:3], s32 offset:112 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v200, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v191, off, s[0:3], s32 offset:120 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v190, off, s[0:3], s32 offset:124 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v189, off, s[0:3], s32 offset:128 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v188, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v187, off, s[0:3], s32 offset:136 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v186, off, s[0:3], s32 offset:140 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v185, off, s[0:3], s32 offset:144 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v184, off, s[0:3], s32 offset:148 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v175, off, s[0:3], s32 offset:152 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v174, off, s[0:3], s32 offset:156 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v173, off, s[0:3], s32 offset:160 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v172, off, s[0:3], s32 offset:164 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v171, off, s[0:3], s32 offset:168 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v170, off, s[0:3], s32 offset:172 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v169, off, s[0:3], s32 offset:176 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v168, off, s[0:3], s32 offset:180 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v159, off, s[0:3], s32 offset:184 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v158, off, s[0:3], s32 offset:188 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v157, off, s[0:3], s32 offset:192 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v156, off, s[0:3], s32 offset:196 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v155, off, s[0:3], s32 offset:200 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v154, off, s[0:3], s32 offset:204 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v153, off, s[0:3], s32 offset:208 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v152, off, s[0:3], s32 offset:212 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v143, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v142, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v141, off, s[0:3], s32 offset:224 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v140, off, s[0:3], s32 offset:228 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v139, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v138, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v137, off, s[0:3], s32 offset:240 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v136, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v127, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v126, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v125, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v124, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v123, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v122, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v121, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v120, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v111, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v110, off, s[0:3], s32 offset:284 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v109, off, s[0:3], s32 offset:288 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v108, off, s[0:3], s32 offset:292 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v107, off, s[0:3], s32 offset:296 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v106, off, s[0:3], s32 offset:300 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v105, off, s[0:3], s32 offset:304 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v104, off, s[0:3], s32 offset:308 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v95, off, s[0:3], s32 offset:312 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v94, off, s[0:3], s32 offset:316 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v93, off, s[0:3], s32 offset:320 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v92, off, s[0:3], s32 offset:324 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v91, off, s[0:3], s32 offset:328 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v90, off, s[0:3], s32 offset:332 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v89, off, s[0:3], s32 offset:336 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v88, off, s[0:3], s32 offset:340 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v79, off, s[0:3], s32 offset:344 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v78, off, s[0:3], s32 offset:348 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v77, off, s[0:3], s32 offset:352 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v76, off, s[0:3], s32 offset:356 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v75, off, s[0:3], s32 offset:360 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v74, off, s[0:3], s32 offset:364 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v73, off, s[0:3], s32 offset:368 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v72, off, s[0:3], s32 offset:372 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v63, off, s[0:3], s32 offset:376 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v62, off, s[0:3], s32 offset:380 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v61, off, s[0:3], s32 offset:384 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v60, off, s[0:3], s32 offset:388 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v59, off, s[0:3], s32 offset:392 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v58, off, s[0:3], s32 offset:396 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v57, off, s[0:3], s32 offset:400 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v56, off, s[0:3], s32 offset:404 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v47, off, s[0:3], s32 offset:408 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v46, off, s[0:3], s32 offset:412 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v45, off, s[0:3], s32 offset:416 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v44, off, s[0:3], s32 offset:420 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v43, off, s[0:3], s32 offset:424 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v42, off, s[0:3], s32 offset:428 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v41, off, s[0:3], s32 offset:432 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s32 offset:436 ; 4-byte Folded Reload
+; GCN-NEXT:    s_or_saveexec_b64 s[4:5], -1
+; GCN-NEXT:    buffer_load_dword v254, off, s[0:3], s32 offset:444 ; 4-byte Folded Reload
+; GCN-NEXT:    s_mov_b64 exec, s[4:5]
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    s_setpc_b64 s[30:31]
   %alloca = alloca i32, align 4, addrspace(5)
   store volatile i32 0, i32 addrspace(5)* %alloca
 
@@ -147,12 +885,243 @@ ret:
   ret void
 }
 
-; GCN-LABEL: {{^}}spill_sgpr_with_tail_call
-; GCN-NOT:  buffer_store_dword v255, off, s[0:3], s32
-; GCN-NOT:  v_writelane
-; GCN:  s_setpc_b64 s[4:5]
-
 define void @spill_sgpr_with_tail_call() #0 {
+; GCN-LABEL: spill_sgpr_with_tail_call:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s32 offset:440 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v41, off, s[0:3], s32 offset:436 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v42, off, s[0:3], s32 offset:432 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v43, off, s[0:3], s32 offset:428 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v44, off, s[0:3], s32 offset:424 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v45, off, s[0:3], s32 offset:420 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v46, off, s[0:3], s32 offset:416 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v47, off, s[0:3], s32 offset:412 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v56, off, s[0:3], s32 offset:408 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v57, off, s[0:3], s32 offset:404 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v58, off, s[0:3], s32 offset:400 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v59, off, s[0:3], s32 offset:396 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v60, off, s[0:3], s32 offset:392 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v61, off, s[0:3], s32 offset:388 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v62, off, s[0:3], s32 offset:384 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v63, off, s[0:3], s32 offset:380 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v72, off, s[0:3], s32 offset:376 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v73, off, s[0:3], s32 offset:372 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v74, off, s[0:3], s32 offset:368 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v75, off, s[0:3], s32 offset:364 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v76, off, s[0:3], s32 offset:360 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v77, off, s[0:3], s32 offset:356 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v78, off, s[0:3], s32 offset:352 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v79, off, s[0:3], s32 offset:348 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v88, off, s[0:3], s32 offset:344 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v89, off, s[0:3], s32 offset:340 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v90, off, s[0:3], s32 offset:336 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v91, off, s[0:3], s32 offset:332 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v92, off, s[0:3], s32 offset:328 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v93, off, s[0:3], s32 offset:324 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v94, off, s[0:3], s32 offset:320 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v95, off, s[0:3], s32 offset:316 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v104, off, s[0:3], s32 offset:312 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v105, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v106, off, s[0:3], s32 offset:304 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v107, off, s[0:3], s32 offset:300 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v108, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v109, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v110, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v111, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v120, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v121, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v122, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v123, off, s[0:3], s32 offset:268 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v124, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v125, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v126, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v127, off, s[0:3], s32 offset:252 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v136, off, s[0:3], s32 offset:248 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v137, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v138, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v139, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v140, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v141, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v142, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v143, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v152, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v153, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v154, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v155, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v156, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v157, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v158, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v159, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v168, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v169, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v170, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v171, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v172, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v173, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v174, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v175, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v184, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v185, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v186, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v187, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v188, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v189, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v190, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v191, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v200, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v201, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v202, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v203, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v204, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v205, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v206, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v207, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v216, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v217, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v218, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v219, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v220, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v221, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v222, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v223, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v232, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v233, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v234, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v235, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v236, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v237, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v238, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v239, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v248, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v249, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v250, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v251, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v252, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v253, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v254, off, s[0:3], s32 ; 4-byte Folded Spill
+; GCN-NEXT:    v_mov_b32_e32 v0, 0
+; GCN-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:444
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    s_getpc_b64 s[4:5]
+; GCN-NEXT:    s_add_u32 s4, s4, child_function at gotpcrel32@lo+4
+; GCN-NEXT:    s_addc_u32 s5, s5, child_function at gotpcrel32@hi+12
+; GCN-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GCN-NEXT:    buffer_load_dword v254, off, s[0:3], s32 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v253, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v252, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v251, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v250, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v249, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v248, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v239, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v238, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v237, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v236, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v235, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v234, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v233, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v232, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v223, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v222, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v221, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v220, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v219, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v218, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v217, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v216, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v207, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v206, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v205, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v204, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v203, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v202, off, s[0:3], s32 offset:112 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v201, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v200, off, s[0:3], s32 offset:120 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v191, off, s[0:3], s32 offset:124 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v190, off, s[0:3], s32 offset:128 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v189, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v188, off, s[0:3], s32 offset:136 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v187, off, s[0:3], s32 offset:140 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v186, off, s[0:3], s32 offset:144 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v185, off, s[0:3], s32 offset:148 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v184, off, s[0:3], s32 offset:152 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v175, off, s[0:3], s32 offset:156 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v174, off, s[0:3], s32 offset:160 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v173, off, s[0:3], s32 offset:164 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v172, off, s[0:3], s32 offset:168 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v171, off, s[0:3], s32 offset:172 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v170, off, s[0:3], s32 offset:176 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v169, off, s[0:3], s32 offset:180 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v168, off, s[0:3], s32 offset:184 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v159, off, s[0:3], s32 offset:188 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v158, off, s[0:3], s32 offset:192 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v157, off, s[0:3], s32 offset:196 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v156, off, s[0:3], s32 offset:200 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v155, off, s[0:3], s32 offset:204 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v154, off, s[0:3], s32 offset:208 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v153, off, s[0:3], s32 offset:212 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v152, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v143, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v142, off, s[0:3], s32 offset:224 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v141, off, s[0:3], s32 offset:228 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v140, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v139, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v138, off, s[0:3], s32 offset:240 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v137, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v136, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v127, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v126, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v125, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v124, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v123, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v122, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v121, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v120, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v111, off, s[0:3], s32 offset:284 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v110, off, s[0:3], s32 offset:288 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v109, off, s[0:3], s32 offset:292 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v108, off, s[0:3], s32 offset:296 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v107, off, s[0:3], s32 offset:300 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v106, off, s[0:3], s32 offset:304 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v105, off, s[0:3], s32 offset:308 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v104, off, s[0:3], s32 offset:312 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v95, off, s[0:3], s32 offset:316 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v94, off, s[0:3], s32 offset:320 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v93, off, s[0:3], s32 offset:324 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v92, off, s[0:3], s32 offset:328 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v91, off, s[0:3], s32 offset:332 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v90, off, s[0:3], s32 offset:336 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v89, off, s[0:3], s32 offset:340 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v88, off, s[0:3], s32 offset:344 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v79, off, s[0:3], s32 offset:348 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v78, off, s[0:3], s32 offset:352 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v77, off, s[0:3], s32 offset:356 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v76, off, s[0:3], s32 offset:360 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v75, off, s[0:3], s32 offset:364 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v74, off, s[0:3], s32 offset:368 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v73, off, s[0:3], s32 offset:372 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v72, off, s[0:3], s32 offset:376 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v63, off, s[0:3], s32 offset:380 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v62, off, s[0:3], s32 offset:384 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v61, off, s[0:3], s32 offset:388 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v60, off, s[0:3], s32 offset:392 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v59, off, s[0:3], s32 offset:396 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v58, off, s[0:3], s32 offset:400 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v57, off, s[0:3], s32 offset:404 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v56, off, s[0:3], s32 offset:408 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v47, off, s[0:3], s32 offset:412 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v46, off, s[0:3], s32 offset:416 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v45, off, s[0:3], s32 offset:420 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v44, off, s[0:3], s32 offset:424 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v43, off, s[0:3], s32 offset:428 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v42, off, s[0:3], s32 offset:432 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v41, off, s[0:3], s32 offset:436 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s32 offset:440 ; 4-byte Folded Reload
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_setpc_b64 s[4:5]
   %alloca = alloca i32, align 4, addrspace(5)
   store volatile i32 0, i32 addrspace(5)* %alloca
 
@@ -187,29 +1156,281 @@ define void @spill_sgpr_with_tail_call() #0 {
   ret void
 }
 
-; Special case where all registers are explicitly clobbered in the function and
-; we have no VGPR to allocate for SGPR spills. We are forced to spill to memory.
-
-; GCN-LABEL: {{^}}spill_sgpr_no_free_vgpr:
-; GCN: v_writelane_b32 v{{[0-9]+}}, s34, 0
-; GCN: v_writelane_b32 v{{[0-9]+}}, s35, 1
-; GCN: v_writelane_b32 v{{[0-9]+}}, s36, 2
-; GCN: v_writelane_b32 v{{[0-9]+}}, s37, 3
-; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s32
-; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s32
-; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s32
-; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s32
-; GCN: #ASMEND
-; GCN: buffer_load_dword v{{[0-9]+}}
-; GCN: buffer_load_dword v{{[0-9]+}}
-; GCN: buffer_load_dword v{{[0-9]+}}
-; GCN: buffer_load_dword v{{[0-9]+}}
-; GCN: v_readlane_b32 s37, v{{[0-9]+}}, 3
-; GCN: v_readlane_b32 s36, v{{[0-9]+}}, 2
-; GCN: v_readlane_b32 s35, v{{[0-9]+}}, 1
-; GCN: v_readlane_b32 s34, v{{[0-9]+}}, 0
-
 define void @spill_sgpr_no_free_vgpr(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
+; GCN-LABEL: spill_sgpr_no_free_vgpr:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    s_or_saveexec_b64 s[4:5], -1
+; GCN-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:448 ; 4-byte Folded Spill
+; GCN-NEXT:    s_mov_b64 exec, s[4:5]
+; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s32 offset:444 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v41, off, s[0:3], s32 offset:440 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v42, off, s[0:3], s32 offset:436 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v43, off, s[0:3], s32 offset:432 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v44, off, s[0:3], s32 offset:428 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v45, off, s[0:3], s32 offset:424 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v46, off, s[0:3], s32 offset:420 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v47, off, s[0:3], s32 offset:416 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v56, off, s[0:3], s32 offset:412 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v57, off, s[0:3], s32 offset:408 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v58, off, s[0:3], s32 offset:404 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v59, off, s[0:3], s32 offset:400 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v60, off, s[0:3], s32 offset:396 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v61, off, s[0:3], s32 offset:392 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v62, off, s[0:3], s32 offset:388 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v63, off, s[0:3], s32 offset:384 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v72, off, s[0:3], s32 offset:380 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v73, off, s[0:3], s32 offset:376 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v74, off, s[0:3], s32 offset:372 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v75, off, s[0:3], s32 offset:368 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v76, off, s[0:3], s32 offset:364 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v77, off, s[0:3], s32 offset:360 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v78, off, s[0:3], s32 offset:356 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v79, off, s[0:3], s32 offset:352 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v88, off, s[0:3], s32 offset:348 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v89, off, s[0:3], s32 offset:344 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v90, off, s[0:3], s32 offset:340 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v91, off, s[0:3], s32 offset:336 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v92, off, s[0:3], s32 offset:332 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v93, off, s[0:3], s32 offset:328 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v94, off, s[0:3], s32 offset:324 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v95, off, s[0:3], s32 offset:320 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v104, off, s[0:3], s32 offset:316 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v105, off, s[0:3], s32 offset:312 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v106, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v107, off, s[0:3], s32 offset:304 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v108, off, s[0:3], s32 offset:300 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v109, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v110, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v111, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v120, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v121, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v122, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v123, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v124, off, s[0:3], s32 offset:268 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v125, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v126, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v127, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v136, off, s[0:3], s32 offset:252 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v137, off, s[0:3], s32 offset:248 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v138, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v139, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v140, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v141, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v142, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v143, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v152, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v153, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v154, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v155, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v156, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v157, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v158, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v159, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v168, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v169, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v170, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v171, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v172, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v173, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v174, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v175, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v184, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v185, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v186, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v187, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v188, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v189, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v190, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v191, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v200, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v201, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v202, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v203, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v204, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v205, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v206, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v207, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v216, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v217, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v218, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v219, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v220, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v221, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v222, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v223, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v232, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v233, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v234, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v235, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v236, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v237, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v238, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v239, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v248, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v249, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v250, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v251, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v252, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v253, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v254, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v255, off, s[0:3], s32 ; 4-byte Folded Spill
+; GCN-NEXT:    v_writelane_b32 v4, s34, 0
+; GCN-NEXT:    v_writelane_b32 v4, s35, 1
+; GCN-NEXT:    v_writelane_b32 v4, s36, 2
+; GCN-NEXT:    v_writelane_b32 v4, s37, 3
+; GCN-NEXT:    v_mov_b32_e32 v5, v3
+; GCN-NEXT:    v_mov_b32_e32 v3, v1
+; GCN-NEXT:    ; implicit-def: $sgpr4
+; GCN-NEXT:    ; implicit-def: $sgpr4
+; GCN-NEXT:    ; kill: def $vgpr3 killed $vgpr3 killed $exec
+; GCN-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
+; GCN-NEXT:    v_mov_b32_e32 v1, v3
+; GCN-NEXT:    ; implicit-def: $sgpr4
+; GCN-NEXT:    ; implicit-def: $sgpr4
+; GCN-NEXT:    ; kill: def $vgpr5 killed $vgpr5 killed $exec
+; GCN-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GCN-NEXT:    v_mov_b32_e32 v3, v5
+; GCN-NEXT:    ; implicit-def: $sgpr4_sgpr5
+; GCN-NEXT:    ; implicit-def: $sgpr4_sgpr5
+; GCN-NEXT:    flat_load_dwordx4 v[5:8], v[2:3]
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:452 ; 4-byte Folded Spill
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:456 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:460 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:464 ; 4-byte Folded Spill
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:452 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:456 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:460 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:464 ; 4-byte Folded Reload
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    flat_store_dwordx4 v[0:1], v[5:8]
+; GCN-NEXT:    v_readlane_b32 s37, v4, 3
+; GCN-NEXT:    v_readlane_b32 s36, v4, 2
+; GCN-NEXT:    v_readlane_b32 s35, v4, 1
+; GCN-NEXT:    v_readlane_b32 s34, v4, 0
+; GCN-NEXT:    buffer_load_dword v255, off, s[0:3], s32 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v254, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v253, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v252, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v251, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v250, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v249, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v248, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v239, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v238, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v237, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v236, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v235, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v234, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v233, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v232, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v223, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v222, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v221, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v220, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v219, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v218, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v217, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v216, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v207, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v206, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v205, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v204, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v203, off, s[0:3], s32 offset:112 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v202, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v201, off, s[0:3], s32 offset:120 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v200, off, s[0:3], s32 offset:124 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v191, off, s[0:3], s32 offset:128 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v190, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v189, off, s[0:3], s32 offset:136 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v188, off, s[0:3], s32 offset:140 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v187, off, s[0:3], s32 offset:144 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v186, off, s[0:3], s32 offset:148 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v185, off, s[0:3], s32 offset:152 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v184, off, s[0:3], s32 offset:156 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v175, off, s[0:3], s32 offset:160 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v174, off, s[0:3], s32 offset:164 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v173, off, s[0:3], s32 offset:168 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v172, off, s[0:3], s32 offset:172 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v171, off, s[0:3], s32 offset:176 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v170, off, s[0:3], s32 offset:180 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v169, off, s[0:3], s32 offset:184 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v168, off, s[0:3], s32 offset:188 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v159, off, s[0:3], s32 offset:192 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v158, off, s[0:3], s32 offset:196 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v157, off, s[0:3], s32 offset:200 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v156, off, s[0:3], s32 offset:204 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v155, off, s[0:3], s32 offset:208 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v154, off, s[0:3], s32 offset:212 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v153, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v152, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v143, off, s[0:3], s32 offset:224 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v142, off, s[0:3], s32 offset:228 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v141, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v140, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v139, off, s[0:3], s32 offset:240 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v138, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v137, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v136, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v127, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v126, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v125, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v124, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v123, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v122, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v121, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v120, off, s[0:3], s32 offset:284 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v111, off, s[0:3], s32 offset:288 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v110, off, s[0:3], s32 offset:292 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v109, off, s[0:3], s32 offset:296 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v108, off, s[0:3], s32 offset:300 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v107, off, s[0:3], s32 offset:304 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v106, off, s[0:3], s32 offset:308 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v105, off, s[0:3], s32 offset:312 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v104, off, s[0:3], s32 offset:316 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v95, off, s[0:3], s32 offset:320 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v94, off, s[0:3], s32 offset:324 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v93, off, s[0:3], s32 offset:328 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v92, off, s[0:3], s32 offset:332 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v91, off, s[0:3], s32 offset:336 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v90, off, s[0:3], s32 offset:340 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v89, off, s[0:3], s32 offset:344 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v88, off, s[0:3], s32 offset:348 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v79, off, s[0:3], s32 offset:352 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v78, off, s[0:3], s32 offset:356 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v77, off, s[0:3], s32 offset:360 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v76, off, s[0:3], s32 offset:364 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v75, off, s[0:3], s32 offset:368 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v74, off, s[0:3], s32 offset:372 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v73, off, s[0:3], s32 offset:376 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v72, off, s[0:3], s32 offset:380 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v63, off, s[0:3], s32 offset:384 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v62, off, s[0:3], s32 offset:388 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v61, off, s[0:3], s32 offset:392 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v60, off, s[0:3], s32 offset:396 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v59, off, s[0:3], s32 offset:400 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v58, off, s[0:3], s32 offset:404 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v57, off, s[0:3], s32 offset:408 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v56, off, s[0:3], s32 offset:412 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v47, off, s[0:3], s32 offset:416 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v46, off, s[0:3], s32 offset:420 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v45, off, s[0:3], s32 offset:424 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v44, off, s[0:3], s32 offset:428 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v43, off, s[0:3], s32 offset:432 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v42, off, s[0:3], s32 offset:436 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v41, off, s[0:3], s32 offset:440 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s32 offset:444 ; 4-byte Folded Reload
+; GCN-NEXT:    s_or_saveexec_b64 s[4:5], -1
+; GCN-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:448 ; 4-byte Folded Reload
+; GCN-NEXT:    s_mov_b64 exec, s[4:5]
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    s_setpc_b64 s[30:31]
   %a = load <4 x i32>, <4 x i32> addrspace(1)* %in
   call void asm sideeffect "",
   "~{v6},~{v7},~{v8},~{v9}
@@ -246,11 +1467,13 @@ define void @spill_sgpr_no_free_vgpr(<4 x i32> addrspace(1)* %out, <4 x i32> add
   ret void
 }
 
-; If IPRA no-CSR optimization is enabled, we will not be able to allocate an
-; SGPR for VGPR spills in the parent function since this child function uses all
-; VGPRs.
-
 define internal void @child_function_ipra() #0 {
+; GCN-LABEL: child_function_ipra:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    s_setpc_b64 s[30:31]
   call void asm sideeffect "",
   "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9}
   ,~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19}
@@ -281,22 +1504,519 @@ define internal void @child_function_ipra() #0 {
   ret void
 }
 
-; GCN-LABEL: {{^}}spill_sgpr_no_free_vgpr_ipra:
-; GCN: v_writelane_b32 v0, s30, 0
-; GCN: buffer_store_dword v0, off
-; GCN: v_writelane_b32 v0, s31, 0
-; GCN: buffer_store_dword v0, off
-; GCN: swappc
-; GCN: buffer_load_dword v0, off
-; GCN: v_readlane_b32 s31, v0, 0
-; GCN: buffer_load_dword v0, off
-; GCN: v_readlane_b32 s30, v0, 0
 define void @spill_sgpr_no_free_vgpr_ipra() #0 {
+; GCN-LABEL: spill_sgpr_no_free_vgpr_ipra:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    s_mov_b32 s6, s33
+; GCN-NEXT:    s_mov_b32 s33, s32
+; GCN-NEXT:    s_add_i32 s32, s32, 0x7400
+; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s33 offset:444 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v41, off, s[0:3], s33 offset:440 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v42, off, s[0:3], s33 offset:436 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v43, off, s[0:3], s33 offset:432 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v44, off, s[0:3], s33 offset:428 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v45, off, s[0:3], s33 offset:424 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v46, off, s[0:3], s33 offset:420 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v47, off, s[0:3], s33 offset:416 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v56, off, s[0:3], s33 offset:412 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v57, off, s[0:3], s33 offset:408 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v58, off, s[0:3], s33 offset:404 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v59, off, s[0:3], s33 offset:400 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v60, off, s[0:3], s33 offset:396 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v61, off, s[0:3], s33 offset:392 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v62, off, s[0:3], s33 offset:388 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v63, off, s[0:3], s33 offset:384 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v72, off, s[0:3], s33 offset:380 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v73, off, s[0:3], s33 offset:376 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v74, off, s[0:3], s33 offset:372 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v75, off, s[0:3], s33 offset:368 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v76, off, s[0:3], s33 offset:364 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v77, off, s[0:3], s33 offset:360 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v78, off, s[0:3], s33 offset:356 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v79, off, s[0:3], s33 offset:352 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v88, off, s[0:3], s33 offset:348 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v89, off, s[0:3], s33 offset:344 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v90, off, s[0:3], s33 offset:340 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v91, off, s[0:3], s33 offset:336 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v92, off, s[0:3], s33 offset:332 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v93, off, s[0:3], s33 offset:328 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v94, off, s[0:3], s33 offset:324 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v95, off, s[0:3], s33 offset:320 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v104, off, s[0:3], s33 offset:316 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v105, off, s[0:3], s33 offset:312 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v106, off, s[0:3], s33 offset:308 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v107, off, s[0:3], s33 offset:304 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v108, off, s[0:3], s33 offset:300 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v109, off, s[0:3], s33 offset:296 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v110, off, s[0:3], s33 offset:292 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v111, off, s[0:3], s33 offset:288 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v120, off, s[0:3], s33 offset:284 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v121, off, s[0:3], s33 offset:280 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v122, off, s[0:3], s33 offset:276 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v123, off, s[0:3], s33 offset:272 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v124, off, s[0:3], s33 offset:268 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v125, off, s[0:3], s33 offset:264 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v126, off, s[0:3], s33 offset:260 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v127, off, s[0:3], s33 offset:256 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v136, off, s[0:3], s33 offset:252 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v137, off, s[0:3], s33 offset:248 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v138, off, s[0:3], s33 offset:244 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v139, off, s[0:3], s33 offset:240 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v140, off, s[0:3], s33 offset:236 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v141, off, s[0:3], s33 offset:232 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v142, off, s[0:3], s33 offset:228 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v143, off, s[0:3], s33 offset:224 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v152, off, s[0:3], s33 offset:220 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v153, off, s[0:3], s33 offset:216 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v154, off, s[0:3], s33 offset:212 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v155, off, s[0:3], s33 offset:208 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v156, off, s[0:3], s33 offset:204 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v157, off, s[0:3], s33 offset:200 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v158, off, s[0:3], s33 offset:196 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v159, off, s[0:3], s33 offset:192 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v168, off, s[0:3], s33 offset:188 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v169, off, s[0:3], s33 offset:184 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v170, off, s[0:3], s33 offset:180 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v171, off, s[0:3], s33 offset:176 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v172, off, s[0:3], s33 offset:172 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v173, off, s[0:3], s33 offset:168 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v174, off, s[0:3], s33 offset:164 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v175, off, s[0:3], s33 offset:160 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v184, off, s[0:3], s33 offset:156 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v185, off, s[0:3], s33 offset:152 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v186, off, s[0:3], s33 offset:148 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v187, off, s[0:3], s33 offset:144 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v188, off, s[0:3], s33 offset:140 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v189, off, s[0:3], s33 offset:136 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v190, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v191, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v200, off, s[0:3], s33 offset:124 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v201, off, s[0:3], s33 offset:120 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v202, off, s[0:3], s33 offset:116 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v203, off, s[0:3], s33 offset:112 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v204, off, s[0:3], s33 offset:108 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v205, off, s[0:3], s33 offset:104 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v206, off, s[0:3], s33 offset:100 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v207, off, s[0:3], s33 offset:96 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v216, off, s[0:3], s33 offset:92 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v217, off, s[0:3], s33 offset:88 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v218, off, s[0:3], s33 offset:84 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v219, off, s[0:3], s33 offset:80 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v220, off, s[0:3], s33 offset:76 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v221, off, s[0:3], s33 offset:72 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v222, off, s[0:3], s33 offset:68 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v223, off, s[0:3], s33 offset:64 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v232, off, s[0:3], s33 offset:60 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v233, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v234, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v235, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v236, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v237, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v238, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v239, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v248, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v249, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v250, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v251, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v252, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v253, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v254, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v255, off, s[0:3], s33 ; 4-byte Folded Spill
+; GCN-NEXT:    s_mov_b64 s[14:15], exec
+; GCN-NEXT:    s_mov_b64 exec, 1
+; GCN-NEXT:    buffer_store_dword v1, off, s[0:3], s33 offset:456
+; GCN-NEXT:    v_writelane_b32 v1, s30, 0
+; GCN-NEXT:    buffer_store_dword v1, off, s[0:3], s33 offset:448 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_load_dword v1, off, s[0:3], s33 offset:456
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    s_mov_b64 exec, s[14:15]
+; GCN-NEXT:    s_mov_b64 s[12:13], exec
+; GCN-NEXT:    s_mov_b64 exec, 1
+; GCN-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:456
+; GCN-NEXT:    v_writelane_b32 v0, s31, 0
+; GCN-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:452 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:456
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    s_mov_b64 exec, s[12:13]
+; GCN-NEXT:    s_getpc_b64 s[4:5]
+; GCN-NEXT:    s_add_u32 s4, s4, child_function_ipra at rel32@lo+4
+; GCN-NEXT:    s_addc_u32 s5, s5, child_function_ipra at rel32@hi+12
+; GCN-NEXT:    s_mov_b64 s[10:11], s[2:3]
+; GCN-NEXT:    s_mov_b64 s[8:9], s[0:1]
+; GCN-NEXT:    s_mov_b64 s[0:1], s[8:9]
+; GCN-NEXT:    s_mov_b64 s[2:3], s[10:11]
+; GCN-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GCN-NEXT:    s_mov_b64 s[8:9], exec
+; GCN-NEXT:    s_mov_b64 exec, 1
+; GCN-NEXT:    buffer_store_dword v1, off, s[0:3], s33 offset:456
+; GCN-NEXT:    buffer_load_dword v1, off, s[0:3], s33 offset:452 ; 4-byte Folded Reload
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    v_readlane_b32 s31, v1, 0
+; GCN-NEXT:    buffer_load_dword v1, off, s[0:3], s33 offset:456
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    s_mov_b64 exec, s[8:9]
+; GCN-NEXT:    s_mov_b64 s[4:5], exec
+; GCN-NEXT:    s_mov_b64 exec, 1
+; GCN-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:456
+; GCN-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:448 ; 4-byte Folded Reload
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    v_readlane_b32 s30, v0, 0
+; GCN-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:456
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    s_mov_b64 exec, s[4:5]
+; GCN-NEXT:    buffer_load_dword v255, off, s[0:3], s33 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v254, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v253, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v252, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v251, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v250, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v249, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v248, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v239, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v238, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v237, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v236, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v235, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v234, off, s[0:3], s33 offset:52 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v233, off, s[0:3], s33 offset:56 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v232, off, s[0:3], s33 offset:60 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v223, off, s[0:3], s33 offset:64 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v222, off, s[0:3], s33 offset:68 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v221, off, s[0:3], s33 offset:72 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v220, off, s[0:3], s33 offset:76 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v219, off, s[0:3], s33 offset:80 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v218, off, s[0:3], s33 offset:84 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v217, off, s[0:3], s33 offset:88 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v216, off, s[0:3], s33 offset:92 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v207, off, s[0:3], s33 offset:96 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v206, off, s[0:3], s33 offset:100 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v205, off, s[0:3], s33 offset:104 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v204, off, s[0:3], s33 offset:108 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v203, off, s[0:3], s33 offset:112 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v202, off, s[0:3], s33 offset:116 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v201, off, s[0:3], s33 offset:120 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v200, off, s[0:3], s33 offset:124 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v191, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v190, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v189, off, s[0:3], s33 offset:136 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v188, off, s[0:3], s33 offset:140 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v187, off, s[0:3], s33 offset:144 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v186, off, s[0:3], s33 offset:148 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v185, off, s[0:3], s33 offset:152 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v184, off, s[0:3], s33 offset:156 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v175, off, s[0:3], s33 offset:160 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v174, off, s[0:3], s33 offset:164 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v173, off, s[0:3], s33 offset:168 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v172, off, s[0:3], s33 offset:172 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v171, off, s[0:3], s33 offset:176 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v170, off, s[0:3], s33 offset:180 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v169, off, s[0:3], s33 offset:184 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v168, off, s[0:3], s33 offset:188 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v159, off, s[0:3], s33 offset:192 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v158, off, s[0:3], s33 offset:196 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v157, off, s[0:3], s33 offset:200 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v156, off, s[0:3], s33 offset:204 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v155, off, s[0:3], s33 offset:208 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v154, off, s[0:3], s33 offset:212 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v153, off, s[0:3], s33 offset:216 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v152, off, s[0:3], s33 offset:220 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v143, off, s[0:3], s33 offset:224 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v142, off, s[0:3], s33 offset:228 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v141, off, s[0:3], s33 offset:232 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v140, off, s[0:3], s33 offset:236 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v139, off, s[0:3], s33 offset:240 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v138, off, s[0:3], s33 offset:244 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v137, off, s[0:3], s33 offset:248 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v136, off, s[0:3], s33 offset:252 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v127, off, s[0:3], s33 offset:256 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v126, off, s[0:3], s33 offset:260 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v125, off, s[0:3], s33 offset:264 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v124, off, s[0:3], s33 offset:268 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v123, off, s[0:3], s33 offset:272 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v122, off, s[0:3], s33 offset:276 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v121, off, s[0:3], s33 offset:280 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v120, off, s[0:3], s33 offset:284 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v111, off, s[0:3], s33 offset:288 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v110, off, s[0:3], s33 offset:292 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v109, off, s[0:3], s33 offset:296 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v108, off, s[0:3], s33 offset:300 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v107, off, s[0:3], s33 offset:304 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v106, off, s[0:3], s33 offset:308 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v105, off, s[0:3], s33 offset:312 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v104, off, s[0:3], s33 offset:316 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v95, off, s[0:3], s33 offset:320 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v94, off, s[0:3], s33 offset:324 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v93, off, s[0:3], s33 offset:328 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v92, off, s[0:3], s33 offset:332 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v91, off, s[0:3], s33 offset:336 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v90, off, s[0:3], s33 offset:340 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v89, off, s[0:3], s33 offset:344 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v88, off, s[0:3], s33 offset:348 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v79, off, s[0:3], s33 offset:352 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v78, off, s[0:3], s33 offset:356 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v77, off, s[0:3], s33 offset:360 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v76, off, s[0:3], s33 offset:364 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v75, off, s[0:3], s33 offset:368 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v74, off, s[0:3], s33 offset:372 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v73, off, s[0:3], s33 offset:376 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v72, off, s[0:3], s33 offset:380 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v63, off, s[0:3], s33 offset:384 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v62, off, s[0:3], s33 offset:388 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v61, off, s[0:3], s33 offset:392 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v60, off, s[0:3], s33 offset:396 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v59, off, s[0:3], s33 offset:400 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v58, off, s[0:3], s33 offset:404 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v57, off, s[0:3], s33 offset:408 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v56, off, s[0:3], s33 offset:412 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v47, off, s[0:3], s33 offset:416 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v46, off, s[0:3], s33 offset:420 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v45, off, s[0:3], s33 offset:424 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v44, off, s[0:3], s33 offset:428 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v43, off, s[0:3], s33 offset:432 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v42, off, s[0:3], s33 offset:436 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v41, off, s[0:3], s33 offset:440 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s33 offset:444 ; 4-byte Folded Reload
+; GCN-NEXT:    s_add_i32 s32, s32, 0xffff8c00
+; GCN-NEXT:    s_mov_b32 s33, s6
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    s_setpc_b64 s[30:31]
   call void @child_function_ipra()
   ret void
 }
 
 define internal void @child_function_ipra_tail_call() #0 {
+; GCN-LABEL: child_function_ipra_tail_call:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s32 offset:444 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v41, off, s[0:3], s32 offset:440 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v42, off, s[0:3], s32 offset:436 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v43, off, s[0:3], s32 offset:432 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v44, off, s[0:3], s32 offset:428 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v45, off, s[0:3], s32 offset:424 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v46, off, s[0:3], s32 offset:420 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v47, off, s[0:3], s32 offset:416 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v56, off, s[0:3], s32 offset:412 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v57, off, s[0:3], s32 offset:408 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v58, off, s[0:3], s32 offset:404 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v59, off, s[0:3], s32 offset:400 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v60, off, s[0:3], s32 offset:396 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v61, off, s[0:3], s32 offset:392 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v62, off, s[0:3], s32 offset:388 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v63, off, s[0:3], s32 offset:384 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v72, off, s[0:3], s32 offset:380 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v73, off, s[0:3], s32 offset:376 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v74, off, s[0:3], s32 offset:372 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v75, off, s[0:3], s32 offset:368 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v76, off, s[0:3], s32 offset:364 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v77, off, s[0:3], s32 offset:360 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v78, off, s[0:3], s32 offset:356 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v79, off, s[0:3], s32 offset:352 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v88, off, s[0:3], s32 offset:348 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v89, off, s[0:3], s32 offset:344 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v90, off, s[0:3], s32 offset:340 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v91, off, s[0:3], s32 offset:336 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v92, off, s[0:3], s32 offset:332 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v93, off, s[0:3], s32 offset:328 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v94, off, s[0:3], s32 offset:324 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v95, off, s[0:3], s32 offset:320 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v104, off, s[0:3], s32 offset:316 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v105, off, s[0:3], s32 offset:312 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v106, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v107, off, s[0:3], s32 offset:304 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v108, off, s[0:3], s32 offset:300 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v109, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v110, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v111, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v120, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v121, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v122, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v123, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v124, off, s[0:3], s32 offset:268 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v125, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v126, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v127, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v136, off, s[0:3], s32 offset:252 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v137, off, s[0:3], s32 offset:248 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v138, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v139, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v140, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v141, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v142, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v143, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v152, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v153, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v154, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v155, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v156, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v157, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v158, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v159, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v168, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v169, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v170, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v171, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v172, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v173, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v174, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v175, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v184, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v185, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v186, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v187, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v188, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v189, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v190, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v191, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v200, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v201, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v202, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v203, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v204, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v205, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v206, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v207, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v216, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v217, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v218, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v219, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v220, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v221, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v222, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v223, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v232, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v233, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v234, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v235, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v236, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v237, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v238, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v239, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v248, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v249, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v250, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v251, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v252, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v253, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v254, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v255, off, s[0:3], s32 ; 4-byte Folded Spill
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    buffer_load_dword v255, off, s[0:3], s32 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v254, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v253, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v252, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v251, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v250, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v249, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v248, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v239, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v238, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v237, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v236, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v235, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v234, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v233, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v232, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v223, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v222, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v221, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v220, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v219, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v218, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v217, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v216, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v207, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v206, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v205, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v204, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v203, off, s[0:3], s32 offset:112 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v202, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v201, off, s[0:3], s32 offset:120 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v200, off, s[0:3], s32 offset:124 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v191, off, s[0:3], s32 offset:128 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v190, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v189, off, s[0:3], s32 offset:136 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v188, off, s[0:3], s32 offset:140 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v187, off, s[0:3], s32 offset:144 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v186, off, s[0:3], s32 offset:148 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v185, off, s[0:3], s32 offset:152 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v184, off, s[0:3], s32 offset:156 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v175, off, s[0:3], s32 offset:160 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v174, off, s[0:3], s32 offset:164 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v173, off, s[0:3], s32 offset:168 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v172, off, s[0:3], s32 offset:172 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v171, off, s[0:3], s32 offset:176 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v170, off, s[0:3], s32 offset:180 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v169, off, s[0:3], s32 offset:184 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v168, off, s[0:3], s32 offset:188 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v159, off, s[0:3], s32 offset:192 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v158, off, s[0:3], s32 offset:196 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v157, off, s[0:3], s32 offset:200 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v156, off, s[0:3], s32 offset:204 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v155, off, s[0:3], s32 offset:208 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v154, off, s[0:3], s32 offset:212 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v153, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v152, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v143, off, s[0:3], s32 offset:224 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v142, off, s[0:3], s32 offset:228 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v141, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v140, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v139, off, s[0:3], s32 offset:240 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v138, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v137, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v136, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v127, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v126, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v125, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v124, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v123, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v122, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v121, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v120, off, s[0:3], s32 offset:284 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v111, off, s[0:3], s32 offset:288 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v110, off, s[0:3], s32 offset:292 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v109, off, s[0:3], s32 offset:296 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v108, off, s[0:3], s32 offset:300 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v107, off, s[0:3], s32 offset:304 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v106, off, s[0:3], s32 offset:308 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v105, off, s[0:3], s32 offset:312 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v104, off, s[0:3], s32 offset:316 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v95, off, s[0:3], s32 offset:320 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v94, off, s[0:3], s32 offset:324 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v93, off, s[0:3], s32 offset:328 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v92, off, s[0:3], s32 offset:332 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v91, off, s[0:3], s32 offset:336 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v90, off, s[0:3], s32 offset:340 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v89, off, s[0:3], s32 offset:344 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v88, off, s[0:3], s32 offset:348 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v79, off, s[0:3], s32 offset:352 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v78, off, s[0:3], s32 offset:356 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v77, off, s[0:3], s32 offset:360 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v76, off, s[0:3], s32 offset:364 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v75, off, s[0:3], s32 offset:368 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v74, off, s[0:3], s32 offset:372 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v73, off, s[0:3], s32 offset:376 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v72, off, s[0:3], s32 offset:380 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v63, off, s[0:3], s32 offset:384 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v62, off, s[0:3], s32 offset:388 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v61, off, s[0:3], s32 offset:392 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v60, off, s[0:3], s32 offset:396 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v59, off, s[0:3], s32 offset:400 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v58, off, s[0:3], s32 offset:404 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v57, off, s[0:3], s32 offset:408 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v56, off, s[0:3], s32 offset:412 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v47, off, s[0:3], s32 offset:416 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v46, off, s[0:3], s32 offset:420 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v45, off, s[0:3], s32 offset:424 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v44, off, s[0:3], s32 offset:428 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v43, off, s[0:3], s32 offset:432 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v42, off, s[0:3], s32 offset:436 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v41, off, s[0:3], s32 offset:440 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s32 offset:444 ; 4-byte Folded Reload
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    s_setpc_b64 s[30:31]
   call void asm sideeffect "",
   "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9}
   ,~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19}
@@ -327,14 +2047,14 @@ define internal void @child_function_ipra_tail_call() #0 {
   ret void
 }
 
-; GCN-LABEL: {{^}}spill_sgpr_no_free_vgpr_ipra_tail_call:
-; GCN-NOT: v_writelane_b32
-; GCN-NOT: buffer_store_dword
-; GCN-NOT: swappc
-; GCN-NOT: buffer_load_dword v0, off
-; GCN-NOT: v_readlane_b32
-; GCN: setpc
 define void @spill_sgpr_no_free_vgpr_ipra_tail_call() #0 {
+; GCN-LABEL: spill_sgpr_no_free_vgpr_ipra_tail_call:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    s_getpc_b64 s[4:5]
+; GCN-NEXT:    s_add_u32 s4, s4, child_function_ipra_tail_call at rel32@lo+4
+; GCN-NEXT:    s_addc_u32 s5, s5, child_function_ipra_tail_call at rel32@hi+12
+; GCN-NEXT:    s_setpc_b64 s[4:5]
   tail call void @child_function_ipra_tail_call()
   ret void
 }

diff  --git a/llvm/test/CodeGen/AMDGPU/spill-offset-calculation.ll b/llvm/test/CodeGen/AMDGPU/spill-offset-calculation.ll
index 8077a0b6adfbd..34bc7523051ff 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-offset-calculation.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-offset-calculation.ll
@@ -1,12 +1,45 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-misched=0 -post-RA-scheduler=0 -stress-regalloc=8 < %s | FileCheck -check-prefixes=GCN,MUBUF %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-misched=0 -post-RA-scheduler=0 -stress-regalloc=8 -mattr=+enable-flat-scratch < %s | FileCheck -check-prefixes=GCN,FLATSCR %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-misched=0 -post-RA-scheduler=0 -stress-regalloc=8 < %s | FileCheck -check-prefix=MUBUF %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-misched=0 -post-RA-scheduler=0 -stress-regalloc=8 -mattr=+enable-flat-scratch < %s | FileCheck -check-prefix=FLATSCR %s
 
 ; Test that the VGPR spiller correctly switches to SGPR offsets when the
 ; instruction offset field would overflow, and that it accounts for memory
 ; swizzling.
 
-; GCN-LABEL: test_inst_offset_kernel
 define amdgpu_kernel void @test_inst_offset_kernel() {
+; MUBUF-LABEL: test_inst_offset_kernel:
+; MUBUF:       ; %bb.0: ; %entry
+; MUBUF-NEXT:    s_add_u32 s0, s0, s7
+; MUBUF-NEXT:    s_addc_u32 s1, s1, 0
+; MUBUF-NEXT:    buffer_load_dword v0, off, s[0:3], 0 offset:8 glc
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    buffer_store_dword v0, off, s[0:3], 0 offset:4092 ; 4-byte Folded Spill
+; MUBUF-NEXT:    ;;#ASMSTART
+; MUBUF-NEXT:    ;;#ASMEND
+; MUBUF-NEXT:    buffer_load_dword v0, off, s[0:3], 0 offset:4092 ; 4-byte Folded Reload
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    buffer_store_dword v0, off, s[0:3], 0 offset:8
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    s_endpgm
+;
+; FLATSCR-LABEL: test_inst_offset_kernel:
+; FLATSCR:       ; %bb.0: ; %entry
+; FLATSCR-NEXT:    s_add_u32 flat_scratch_lo, s0, s3
+; FLATSCR-NEXT:    s_addc_u32 flat_scratch_hi, s1, 0
+; FLATSCR-NEXT:    s_mov_b32 vcc_lo, 0
+; FLATSCR-NEXT:    scratch_load_dword v0, off, vcc_lo offset:8 glc
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    s_movk_i32 s0, 0xffc
+; FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
+; FLATSCR-NEXT:    scratch_store_dword off, v0, s0 ; 4-byte Folded Spill
+; FLATSCR-NEXT:    ;;#ASMSTART
+; FLATSCR-NEXT:    ;;#ASMEND
+; FLATSCR-NEXT:    s_movk_i32 s0, 0xffc
+; FLATSCR-NEXT:    scratch_load_dword v0, off, s0 ; 4-byte Folded Reload
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    scratch_store_dword off, v0, vcc_hi offset:8
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    s_endpgm
 entry:
   ; Occupy 4092 bytes of scratch, so the offset of the spill of %a just fits in
   ; the instruction offset field.
@@ -14,8 +47,8 @@ entry:
   %buf = bitcast i8 addrspace(5)* %alloca to i32 addrspace(5)*
 
   %aptr = getelementptr i32, i32 addrspace(5)* %buf, i32 1
-  ; MUBUF:   buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], 0 offset:4092 ; 4-byte Folded Spill
-  ; FLATSCR: scratch_store_dword off, v{{[0-9]+}}, s{{[0-9]+}} ; 4-byte Folded Spill
+
+
   %a = load volatile i32, i32 addrspace(5)* %aptr
 
   ; Force %a to spill.
@@ -27,8 +60,42 @@ entry:
   ret void
 }
 
-; GCN-LABEL: test_sgpr_offset_kernel
 define amdgpu_kernel void @test_sgpr_offset_kernel() {
+; MUBUF-LABEL: test_sgpr_offset_kernel:
+; MUBUF:       ; %bb.0: ; %entry
+; MUBUF-NEXT:    s_add_u32 s0, s0, s7
+; MUBUF-NEXT:    s_addc_u32 s1, s1, 0
+; MUBUF-NEXT:    buffer_load_dword v0, off, s[0:3], 0 offset:8 glc
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    s_mov_b32 s4, 0x40000
+; MUBUF-NEXT:    buffer_store_dword v0, off, s[0:3], s4 ; 4-byte Folded Spill
+; MUBUF-NEXT:    ;;#ASMSTART
+; MUBUF-NEXT:    ;;#ASMEND
+; MUBUF-NEXT:    s_mov_b32 s4, 0x40000
+; MUBUF-NEXT:    buffer_load_dword v0, off, s[0:3], s4 ; 4-byte Folded Reload
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    buffer_store_dword v0, off, s[0:3], 0 offset:8
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    s_endpgm
+;
+; FLATSCR-LABEL: test_sgpr_offset_kernel:
+; FLATSCR:       ; %bb.0: ; %entry
+; FLATSCR-NEXT:    s_add_u32 flat_scratch_lo, s0, s3
+; FLATSCR-NEXT:    s_addc_u32 flat_scratch_hi, s1, 0
+; FLATSCR-NEXT:    s_mov_b32 vcc_lo, 0
+; FLATSCR-NEXT:    scratch_load_dword v0, off, vcc_lo offset:8 glc
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    s_movk_i32 s0, 0x1000
+; FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
+; FLATSCR-NEXT:    scratch_store_dword off, v0, s0 ; 4-byte Folded Spill
+; FLATSCR-NEXT:    ;;#ASMSTART
+; FLATSCR-NEXT:    ;;#ASMEND
+; FLATSCR-NEXT:    s_movk_i32 s0, 0x1000
+; FLATSCR-NEXT:    scratch_load_dword v0, off, s0 ; 4-byte Folded Reload
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    scratch_store_dword off, v0, vcc_hi offset:8
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    s_endpgm
 entry:
   ; Occupy 4096 bytes of scratch, so the offset of the spill of %a does not
   ; fit in the instruction, and has to live in the SGPR offset.
@@ -37,12 +104,7 @@ entry:
 
   %aptr = getelementptr i32, i32 addrspace(5)* %buf, i32 1
   ; 0x40000 / 64 = 4096 (for wave64)
-  ; MUBUF:   s_mov_b32 s4, 0x40000
-  ; MUBUF:   buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s4 ; 4-byte Folded Spill
-  ; FLATSCR: s_movk_i32 s2, 0x1000
-  ; FLATSCR: scratch_store_dword off, v{{[0-9]+}}, s2 ; 4-byte Folded Spill
   %a = load volatile i32, i32 addrspace(5)* %aptr
-
   ; Force %a to spill
   call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" ()
 
@@ -52,11 +114,50 @@ entry:
   ret void
 }
 
-; FIXME: If we fail to scavenge an SGPR in a kernel we don't have a stack
-; pointer to temporarily update, so we just crash.
-
-; GCN-LABEL: test_sgpr_offset_function_scavenge_fail_func
 define void @test_sgpr_offset_function_scavenge_fail_func() #2 {
+; MUBUF-LABEL: test_sgpr_offset_function_scavenge_fail_func:
+; MUBUF:       ; %bb.0: ; %entry
+; MUBUF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; MUBUF-NEXT:    ;;#ASMSTART
+; MUBUF-NEXT:    ;;#ASMEND
+; MUBUF-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:8 glc
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    v_mov_b32_e32 v1, 0x1004
+; MUBUF-NEXT:    buffer_store_dword v0, v1, s[0:3], s32 offen ; 4-byte Folded Spill
+; MUBUF-NEXT:    ;;#ASMSTART
+; MUBUF-NEXT:    ;;#ASMEND
+; MUBUF-NEXT:    ;;#ASMSTART
+; MUBUF-NEXT:    ;;#ASMEND
+; MUBUF-NEXT:    ;;#ASMSTART
+; MUBUF-NEXT:    ;;#ASMEND
+; MUBUF-NEXT:    v_mov_b32_e32 v1, 0x1004
+; MUBUF-NEXT:    buffer_load_dword v0, v1, s[0:3], s32 offen ; 4-byte Folded Reload
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    ;;#ASMSTART
+; MUBUF-NEXT:    ;;#ASMEND
+; MUBUF-NEXT:    s_setpc_b64 s[30:31]
+;
+; FLATSCR-LABEL: test_sgpr_offset_function_scavenge_fail_func:
+; FLATSCR:       ; %bb.0: ; %entry
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; FLATSCR-NEXT:    ;;#ASMSTART
+; FLATSCR-NEXT:    ;;#ASMEND
+; FLATSCR-NEXT:    scratch_load_dword v0, off, s32 offset:8 glc
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    s_add_i32 s8, s32, 0x1004
+; FLATSCR-NEXT:    scratch_store_dword off, v0, s8 ; 4-byte Folded Spill
+; FLATSCR-NEXT:    ;;#ASMSTART
+; FLATSCR-NEXT:    ;;#ASMEND
+; FLATSCR-NEXT:    ;;#ASMSTART
+; FLATSCR-NEXT:    ;;#ASMEND
+; FLATSCR-NEXT:    ;;#ASMSTART
+; FLATSCR-NEXT:    ;;#ASMEND
+; FLATSCR-NEXT:    s_add_i32 s8, s32, 0x1004
+; FLATSCR-NEXT:    scratch_load_dword v0, off, s8 ; 4-byte Folded Reload
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    ;;#ASMSTART
+; FLATSCR-NEXT:    ;;#ASMEND
+; FLATSCR-NEXT:    s_setpc_b64 s[30:31]
 entry:
   ; Occupy 4096 bytes of scratch, so the offset of the spill of %a does not
   ; fit in the instruction, and has to live in the SGPR offset.
@@ -77,12 +178,6 @@ entry:
 
   ; 0x40000 / 64 = 4096 (for wave64)
   %a = load volatile i32, i32 addrspace(5)* %aptr
-
-  ; MUBUF:   v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0x1004
-  ; MUBUF-NEXT: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+:[0-9]+}}], s32 offen ; 4-byte Folded Spill
-
-; FLATSCR: s_add_i32 [[SOFF:s[0-9]+]], s32, 0x1004
-  ; FLATSCR: scratch_store_dword off, v{{[0-9]+}}, [[SOFF]] ; 4-byte Folded Spill
   call void asm sideeffect "", "s,s,s,s,s,s,s,s,v"(i32 %asm0.0, i32 %asm1.0, i32 %asm2.0, i32 %asm3.0, i32 %asm4.0, i32 %asm5.0, i32 %asm6.0, i32 %asm7.0, i32 %a)
 
   %asm = call { i32, i32, i32, i32, i32, i32, i32, i32 } asm sideeffect "", "=s,=s,=s,=s,=s,=s,=s,=s"()
@@ -96,18 +191,58 @@ entry:
   %asm7 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32 } %asm, 7
 
   call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}"() #0
-
-  ; MUBUF:   v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0x1004
-  ; MUBUF-NEXT: buffer_load_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+:[0-9]+}}], s32 offen ; 4-byte Folded Reload
-  ; FLATSCR: s_add_i32 [[SOFF:s[0-9]+]], s32, 0x1004
-  ; FLATSCR: scratch_load_dword v{{[0-9]+}}, off, [[SOFF]] ; 4-byte Folded Reload
-
    ; Force %a to spill with no free SGPRs
   call void asm sideeffect "", "s,s,s,s,s,s,s,s,v"(i32 %asm0, i32 %asm1, i32 %asm2, i32 %asm3, i32 %asm4, i32 %asm5, i32 %asm6, i32 %asm7, i32 %a)
   ret void
 }
 
 define amdgpu_kernel void @test_sgpr_offset_function_scavenge_fail_kernel() #3 {
+; MUBUF-LABEL: test_sgpr_offset_function_scavenge_fail_kernel:
+; MUBUF:       ; %bb.0: ; %entry
+; MUBUF-NEXT:    s_add_u32 s0, s0, s7
+; MUBUF-NEXT:    s_addc_u32 s1, s1, 0
+; MUBUF-NEXT:    ;;#ASMSTART
+; MUBUF-NEXT:    ;;#ASMEND
+; MUBUF-NEXT:    buffer_load_dword v0, off, s[0:3], 0 offset:8 glc
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    v_mov_b32_e32 v1, 0x1004
+; MUBUF-NEXT:    buffer_store_dword v0, v1, s[0:3], 0 offen ; 4-byte Folded Spill
+; MUBUF-NEXT:    ;;#ASMSTART
+; MUBUF-NEXT:    ;;#ASMEND
+; MUBUF-NEXT:    ;;#ASMSTART
+; MUBUF-NEXT:    ;;#ASMEND
+; MUBUF-NEXT:    ;;#ASMSTART
+; MUBUF-NEXT:    ;;#ASMEND
+; MUBUF-NEXT:    v_mov_b32_e32 v1, 0x1004
+; MUBUF-NEXT:    buffer_load_dword v0, v1, s[0:3], 0 offen ; 4-byte Folded Reload
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    ;;#ASMSTART
+; MUBUF-NEXT:    ;;#ASMEND
+; MUBUF-NEXT:    s_endpgm
+;
+; FLATSCR-LABEL: test_sgpr_offset_function_scavenge_fail_kernel:
+; FLATSCR:       ; %bb.0: ; %entry
+; FLATSCR-NEXT:    s_add_u32 flat_scratch_lo, s0, s3
+; FLATSCR-NEXT:    s_addc_u32 flat_scratch_hi, s1, 0
+; FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
+; FLATSCR-NEXT:    ;;#ASMSTART
+; FLATSCR-NEXT:    ;;#ASMEND
+; FLATSCR-NEXT:    scratch_load_dword v0, off, vcc_hi offset:8 glc
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    s_movk_i32 s8, 0x1004
+; FLATSCR-NEXT:    scratch_store_dword off, v0, s8 ; 4-byte Folded Spill
+; FLATSCR-NEXT:    ;;#ASMSTART
+; FLATSCR-NEXT:    ;;#ASMEND
+; FLATSCR-NEXT:    ;;#ASMSTART
+; FLATSCR-NEXT:    ;;#ASMEND
+; FLATSCR-NEXT:    ;;#ASMSTART
+; FLATSCR-NEXT:    ;;#ASMEND
+; FLATSCR-NEXT:    s_movk_i32 s8, 0x1004
+; FLATSCR-NEXT:    scratch_load_dword v0, off, s8 ; 4-byte Folded Reload
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    ;;#ASMSTART
+; FLATSCR-NEXT:    ;;#ASMEND
+; FLATSCR-NEXT:    s_endpgm
 entry:
   ; Occupy 4096 bytes of scratch, so the offset of the spill of %a does not
   ; fit in the instruction, and has to live in the SGPR offset.
@@ -128,12 +263,6 @@ entry:
 
   ; 0x40000 / 64 = 4096 (for wave64)
   %a = load volatile i32, i32 addrspace(5)* %aptr
-
-  ; MUBUF: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0x1004
-  ; MUBUF: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+:[0-9]+}}], 0 offen ; 4-byte Folded Spill
-
-  ; FLATSCR: s_movk_i32 [[SOFF:s[0-9]+]], 0x1004
-  ; FLATSCR: scratch_store_dword off, v{{[0-9]+}}, [[SOFF]] ; 4-byte Folded Spill
   call void asm sideeffect "", "s,s,s,s,s,s,s,s,v"(i32 %asm0.0, i32 %asm1.0, i32 %asm2.0, i32 %asm3.0, i32 %asm4.0, i32 %asm5.0, i32 %asm6.0, i32 %asm7.0, i32 %a)
 
   %asm = call { i32, i32, i32, i32, i32, i32, i32, i32 } asm sideeffect "", "=s,=s,=s,=s,=s,=s,=s,=s"()
@@ -147,19 +276,57 @@ entry:
   %asm7 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32 } %asm, 7
 
   call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}"() #0
-
-  ; MUBUF: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0x1004
-  ; MUBUF: buffer_load_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+:[0-9]+}}], 0 offen ; 4-byte Folded Reload
-  ; FLATSCR: s_movk_i32 [[SOFF:s[0-9]+]], 0x1004
-  ; FLATSCR: scratch_load_dword v{{[0-9]+}}, off, [[SOFF]] ; 4-byte Folded Reload
-
    ; Force %a to spill with no free SGPRs
   call void asm sideeffect "", "s,s,s,s,s,s,s,s,v"(i32 %asm0, i32 %asm1, i32 %asm2, i32 %asm3, i32 %asm4, i32 %asm5, i32 %asm6, i32 %asm7, i32 %a)
   ret void
 }
 
-; GCN-LABEL: test_sgpr_offset_subregs_kernel
 define amdgpu_kernel void @test_sgpr_offset_subregs_kernel() {
+; MUBUF-LABEL: test_sgpr_offset_subregs_kernel:
+; MUBUF:       ; %bb.0: ; %entry
+; MUBUF-NEXT:    s_add_u32 s0, s0, s7
+; MUBUF-NEXT:    s_addc_u32 s1, s1, 0
+; MUBUF-NEXT:    buffer_load_dword v0, off, s[0:3], 0 offset:12 glc
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    buffer_load_dword v1, off, s[0:3], 0 offset:16 glc
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    buffer_store_dword v0, off, s[0:3], 0 offset:4088 ; 4-byte Folded Spill
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], 0 offset:4092 ; 4-byte Folded Spill
+; MUBUF-NEXT:    ;;#ASMSTART
+; MUBUF-NEXT:    ;;#ASMEND
+; MUBUF-NEXT:    buffer_load_dword v0, off, s[0:3], 0 offset:8 glc
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    buffer_load_dword v0, off, s[0:3], 0 offset:4088 ; 4-byte Folded Reload
+; MUBUF-NEXT:    s_nop 0
+; MUBUF-NEXT:    buffer_load_dword v1, off, s[0:3], 0 offset:4092 ; 4-byte Folded Reload
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    ;;#ASMSTART
+; MUBUF-NEXT:    ; v[0:1]
+; MUBUF-NEXT:    ;;#ASMEND
+; MUBUF-NEXT:    s_endpgm
+;
+; FLATSCR-LABEL: test_sgpr_offset_subregs_kernel:
+; FLATSCR:       ; %bb.0: ; %entry
+; FLATSCR-NEXT:    s_add_u32 flat_scratch_lo, s0, s3
+; FLATSCR-NEXT:    s_addc_u32 flat_scratch_hi, s1, 0
+; FLATSCR-NEXT:    s_mov_b32 vcc_lo, 0
+; FLATSCR-NEXT:    scratch_load_dwordx2 v[0:1], off, vcc_lo offset:12 glc
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    s_movk_i32 s0, 0xff8
+; FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
+; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s0 ; 8-byte Folded Spill
+; FLATSCR-NEXT:    ;;#ASMSTART
+; FLATSCR-NEXT:    ;;#ASMEND
+; FLATSCR-NEXT:    scratch_load_dword v0, off, vcc_hi offset:8 glc
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    s_movk_i32 s0, 0xff8
+; FLATSCR-NEXT:    scratch_load_dwordx2 v[0:1], off, s0 ; 8-byte Folded Reload
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    ;;#ASMSTART
+; FLATSCR-NEXT:    ; v[0:1]
+; FLATSCR-NEXT:    ;;#ASMEND
+; FLATSCR-NEXT:    s_endpgm
 entry:
   ; Occupy 4088 bytes of scratch, so that the spill of the last subreg of %a
   ; still fits below offset 4096 (4088 + 8 - 4 = 4092), and can be placed in
@@ -167,11 +334,6 @@ entry:
   %alloca = alloca i8, i32 4084, align 4, addrspace(5)
   %bufv1 = bitcast i8 addrspace(5)* %alloca to i32 addrspace(5)*
   %bufv2 = bitcast i8 addrspace(5)* %alloca to <2 x i32> addrspace(5)*
-
-  ; MUBUF:   buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], 0 offset:4088 ; 4-byte Folded Spill
-  ; MUBUF:   buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], 0 offset:4092 ; 4-byte Folded Spill
-  ; FLATSCR: s_movk_i32 [[SOFF:s[0-9]+]], 0xff8
-  ; FLATSCR: scratch_store_dwordx2 off, v[{{[0-9:]+}}], [[SOFF]]          ; 8-byte Folded Spill
   %aptr = getelementptr <2 x i32>, <2 x i32> addrspace(5)* %bufv2, i32 1
   %a = load volatile <2 x i32>, <2 x i32> addrspace(5)* %aptr
 
@@ -188,8 +350,54 @@ entry:
   ret void
 }
 
-; GCN-LABEL: test_inst_offset_subregs_kernel
 define amdgpu_kernel void @test_inst_offset_subregs_kernel() {
+; MUBUF-LABEL: test_inst_offset_subregs_kernel:
+; MUBUF:       ; %bb.0: ; %entry
+; MUBUF-NEXT:    s_add_u32 s0, s0, s7
+; MUBUF-NEXT:    s_addc_u32 s1, s1, 0
+; MUBUF-NEXT:    buffer_load_dword v0, off, s[0:3], 0 offset:12 glc
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    buffer_load_dword v1, off, s[0:3], 0 offset:16 glc
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    s_mov_b32 s4, 0x3ff00
+; MUBUF-NEXT:    buffer_store_dword v0, off, s[0:3], s4 ; 4-byte Folded Spill
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], s4 offset:4 ; 4-byte Folded Spill
+; MUBUF-NEXT:    ;;#ASMSTART
+; MUBUF-NEXT:    ;;#ASMEND
+; MUBUF-NEXT:    buffer_load_dword v0, off, s[0:3], 0 offset:8 glc
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    s_mov_b32 s4, 0x3ff00
+; MUBUF-NEXT:    buffer_load_dword v0, off, s[0:3], s4 ; 4-byte Folded Reload
+; MUBUF-NEXT:    s_nop 0
+; MUBUF-NEXT:    buffer_load_dword v1, off, s[0:3], s4 offset:4 ; 4-byte Folded Reload
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    ;;#ASMSTART
+; MUBUF-NEXT:    ; v[0:1]
+; MUBUF-NEXT:    ;;#ASMEND
+; MUBUF-NEXT:    s_endpgm
+;
+; FLATSCR-LABEL: test_inst_offset_subregs_kernel:
+; FLATSCR:       ; %bb.0: ; %entry
+; FLATSCR-NEXT:    s_add_u32 flat_scratch_lo, s0, s3
+; FLATSCR-NEXT:    s_addc_u32 flat_scratch_hi, s1, 0
+; FLATSCR-NEXT:    s_mov_b32 vcc_lo, 0
+; FLATSCR-NEXT:    scratch_load_dwordx2 v[0:1], off, vcc_lo offset:12 glc
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    s_movk_i32 s0, 0xffc
+; FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
+; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s0 ; 8-byte Folded Spill
+; FLATSCR-NEXT:    ;;#ASMSTART
+; FLATSCR-NEXT:    ;;#ASMEND
+; FLATSCR-NEXT:    scratch_load_dword v0, off, vcc_hi offset:8 glc
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    s_movk_i32 s0, 0xffc
+; FLATSCR-NEXT:    scratch_load_dwordx2 v[0:1], off, s0 ; 8-byte Folded Reload
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    ;;#ASMSTART
+; FLATSCR-NEXT:    ; v[0:1]
+; FLATSCR-NEXT:    ;;#ASMEND
+; FLATSCR-NEXT:    s_endpgm
 entry:
   ; Occupy 4092 bytes of scratch, so that the spill of the last subreg of %a
   ; does not fit below offset 4096 (4092 + 8 - 4 = 4096), and has to live
@@ -199,11 +407,6 @@ entry:
   %bufv2 = bitcast i8 addrspace(5)* %alloca to <2 x i32> addrspace(5)*
 
   ; 0x3ff00 / 64 = 4092 (for wave64)
-  ; MUBUF:   s_mov_b32 s4, 0x3ff00
-  ; MUBUF:   buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s4 ; 4-byte Folded Spill
-  ; MUBUF:   buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s4 offset:4 ; 4-byte Folded Spill
-  ; FLATSCR: s_movk_i32 [[SOFF:s[0-9]+]], 0xffc
-  ; FLATSCR: scratch_store_dwordx2 off, v[{{[0-9:]+}}], [[SOFF]]          ; 8-byte Folded Spill
   %aptr = getelementptr <2 x i32>, <2 x i32> addrspace(5)* %bufv2, i32 1
   %a = load volatile <2 x i32>, <2 x i32> addrspace(5)* %aptr
 
@@ -220,8 +423,34 @@ entry:
   ret void
 }
 
-; GCN-LABEL: test_inst_offset_function
 define void @test_inst_offset_function() {
+; MUBUF-LABEL: test_inst_offset_function:
+; MUBUF:       ; %bb.0: ; %entry
+; MUBUF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; MUBUF-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:4 glc
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:4088 ; 4-byte Folded Spill
+; MUBUF-NEXT:    ;;#ASMSTART
+; MUBUF-NEXT:    ;;#ASMEND
+; MUBUF-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:4088 ; 4-byte Folded Reload
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:4
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    s_setpc_b64 s[30:31]
+;
+; FLATSCR-LABEL: test_inst_offset_function:
+; FLATSCR:       ; %bb.0: ; %entry
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; FLATSCR-NEXT:    scratch_load_dword v0, off, s32 offset:4 glc
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    scratch_store_dword off, v0, s32 offset:4088 ; 4-byte Folded Spill
+; FLATSCR-NEXT:    ;;#ASMSTART
+; FLATSCR-NEXT:    ;;#ASMEND
+; FLATSCR-NEXT:    scratch_load_dword v0, off, s32 offset:4088 ; 4-byte Folded Reload
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    scratch_store_dword off, v0, s32 offset:4
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    s_setpc_b64 s[30:31]
 entry:
   ; Occupy enough bytes of scratch, so the offset of the spill of %a
   ; just fits in the instruction offset field when the emergency stack
@@ -231,8 +460,8 @@ entry:
   %buf = bitcast i8 addrspace(5)* %alloca to i32 addrspace(5)*
 
   %aptr = getelementptr i32, i32 addrspace(5)* %buf, i32 1
-  ; MUBUF:   buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4088 ; 4-byte Folded Spill
-  ; FLATSCR: scratch_store_dword off, v{{[0-9]+}}, s{{[0-9]+}} offset:4088 ; 4-byte Folded Spill
+
+
   %a = load volatile i32, i32 addrspace(5)* %aptr
 
   ; Force %a to spill.
@@ -244,8 +473,38 @@ entry:
   ret void
 }
 
-; GCN-LABEL: test_sgpr_offset_function
 define void @test_sgpr_offset_function() {
+; MUBUF-LABEL: test_sgpr_offset_function:
+; MUBUF:       ; %bb.0: ; %entry
+; MUBUF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; MUBUF-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:8 glc
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    s_add_i32 s4, s32, 0x40100
+; MUBUF-NEXT:    buffer_store_dword v0, off, s[0:3], s4 ; 4-byte Folded Spill
+; MUBUF-NEXT:    ;;#ASMSTART
+; MUBUF-NEXT:    ;;#ASMEND
+; MUBUF-NEXT:    s_add_i32 s4, s32, 0x40100
+; MUBUF-NEXT:    buffer_load_dword v0, off, s[0:3], s4 ; 4-byte Folded Reload
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:8
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    s_setpc_b64 s[30:31]
+;
+; FLATSCR-LABEL: test_sgpr_offset_function:
+; FLATSCR:       ; %bb.0: ; %entry
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; FLATSCR-NEXT:    scratch_load_dword v0, off, s32 offset:8 glc
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    s_add_i32 s0, s32, 0x1004
+; FLATSCR-NEXT:    scratch_store_dword off, v0, s0 ; 4-byte Folded Spill
+; FLATSCR-NEXT:    ;;#ASMSTART
+; FLATSCR-NEXT:    ;;#ASMEND
+; FLATSCR-NEXT:    s_add_i32 s0, s32, 0x1004
+; FLATSCR-NEXT:    scratch_load_dword v0, off, s0 ; 4-byte Folded Reload
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    scratch_store_dword off, v0, s32 offset:8
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    s_setpc_b64 s[30:31]
 entry:
   ; Occupy 4096 bytes of scratch, so the offset of the spill of %a does not
   ; fit in the instruction, and has to live in the SGPR offset.
@@ -254,10 +513,6 @@ entry:
 
   %aptr = getelementptr i32, i32 addrspace(5)* %buf, i32 1
   ; 0x40000 / 64 = 4096 (for wave64)
-  ; MUBUF:   s_add_i32 s4, s32, 0x40100
-  ; MUBUF:   buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s4 ; 4-byte Folded Spill
-  ; FLATSCR: s_add_i32 s0, s32, 0x1004
-  ; FLATSCR: scratch_store_dword off, v{{[0-9]+}}, s0 ; 4-byte Folded Spill
   %a = load volatile i32, i32 addrspace(5)* %aptr
 
   ; Force %a to spill
@@ -269,24 +524,57 @@ entry:
   ret void
 }
 
-; GCN-LABEL: test_sgpr_offset_subregs_function
 define void @test_sgpr_offset_subregs_function() {
+; MUBUF-LABEL: test_sgpr_offset_subregs_function:
+; MUBUF:       ; %bb.0: ; %entry
+; MUBUF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; MUBUF-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:8 glc
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:12 glc
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:4084 ; 4-byte Folded Spill
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:4088 ; 4-byte Folded Spill
+; MUBUF-NEXT:    ;;#ASMSTART
+; MUBUF-NEXT:    ;;#ASMEND
+; MUBUF-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:4 glc
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:4084 ; 4-byte Folded Reload
+; MUBUF-NEXT:    s_nop 0
+; MUBUF-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:4088 ; 4-byte Folded Reload
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    ;;#ASMSTART
+; MUBUF-NEXT:    ; v[0:1]
+; MUBUF-NEXT:    ;;#ASMEND
+; MUBUF-NEXT:    s_setpc_b64 s[30:31]
+;
+; FLATSCR-LABEL: test_sgpr_offset_subregs_function:
+; FLATSCR:       ; %bb.0: ; %entry
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; FLATSCR-NEXT:    scratch_load_dwordx2 v[0:1], off, s32 offset:8 glc
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s32 offset:4084 ; 8-byte Folded Spill
+; FLATSCR-NEXT:    ;;#ASMSTART
+; FLATSCR-NEXT:    ;;#ASMEND
+; FLATSCR-NEXT:    scratch_load_dword v0, off, s32 offset:4 glc
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    scratch_load_dwordx2 v[0:1], off, s32 offset:4084 ; 8-byte Folded Reload
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    ;;#ASMSTART
+; FLATSCR-NEXT:    ; v[0:1]
+; FLATSCR-NEXT:    ;;#ASMEND
+; FLATSCR-NEXT:    s_setpc_b64 s[30:31]
 entry:
   ; We want to test the spill of the last subreg of %a is the highest
   ; valid value for the immediate offset. We enable the emergency
   ; stack slot for large frames, so it's hard to get the frame layout
   ; exactly as we want to test it.
-  ;
   ; Occupy 4084 bytes of scratch, so that the spill of the last subreg of %a
   ; still fits below offset 4096 (4084 + 8 - 4 = 4092), and can be placed in
   ; the instruction offset field.
   %alloca = alloca i8, i32 4084, align 4, addrspace(5)
   %bufv1 = bitcast i8 addrspace(5)* %alloca to i32 addrspace(5)*
   %bufv2 = bitcast i8 addrspace(5)* %alloca to <2 x i32> addrspace(5)*
-
-  ; MUBUF:   buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4084 ; 4-byte Folded Spill
-  ; MUBUF:   buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4088 ; 4-byte Folded Spill
-  ; FLATSCR: scratch_store_dwordx2 off, v[{{[0-9:]+}}], s32 offset:4084 ; 8-byte Folded Spill
   %aptr = getelementptr <2 x i32>, <2 x i32> addrspace(5)* %bufv2, i32 1
   %a = load volatile <2 x i32>, <2 x i32> addrspace(5)* %aptr
 
@@ -303,8 +591,48 @@ entry:
   ret void
 }
 
-; GCN-LABEL: test_inst_offset_subregs_function
 define void @test_inst_offset_subregs_function() {
+; MUBUF-LABEL: test_inst_offset_subregs_function:
+; MUBUF:       ; %bb.0: ; %entry
+; MUBUF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; MUBUF-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:12 glc
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:16 glc
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    s_add_i32 s4, s32, 0x3ff00
+; MUBUF-NEXT:    buffer_store_dword v0, off, s[0:3], s4 ; 4-byte Folded Spill
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], s4 offset:4 ; 4-byte Folded Spill
+; MUBUF-NEXT:    ;;#ASMSTART
+; MUBUF-NEXT:    ;;#ASMEND
+; MUBUF-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:8 glc
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    s_add_i32 s4, s32, 0x3ff00
+; MUBUF-NEXT:    buffer_load_dword v0, off, s[0:3], s4 ; 4-byte Folded Reload
+; MUBUF-NEXT:    s_nop 0
+; MUBUF-NEXT:    buffer_load_dword v1, off, s[0:3], s4 offset:4 ; 4-byte Folded Reload
+; MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; MUBUF-NEXT:    ;;#ASMSTART
+; MUBUF-NEXT:    ; v[0:1]
+; MUBUF-NEXT:    ;;#ASMEND
+; MUBUF-NEXT:    s_setpc_b64 s[30:31]
+;
+; FLATSCR-LABEL: test_inst_offset_subregs_function:
+; FLATSCR:       ; %bb.0: ; %entry
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; FLATSCR-NEXT:    scratch_load_dwordx2 v[0:1], off, s32 offset:12 glc
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s32 offset:4092 ; 8-byte Folded Spill
+; FLATSCR-NEXT:    ;;#ASMSTART
+; FLATSCR-NEXT:    ;;#ASMEND
+; FLATSCR-NEXT:    scratch_load_dword v0, off, s32 offset:8 glc
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    scratch_load_dwordx2 v[0:1], off, s32 offset:4092 ; 8-byte Folded Reload
+; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    ;;#ASMSTART
+; FLATSCR-NEXT:    ; v[0:1]
+; FLATSCR-NEXT:    ;;#ASMEND
+; FLATSCR-NEXT:    s_setpc_b64 s[30:31]
 entry:
   ; Occupy 4088 bytes of scratch, so that the spill of the last subreg of %a
   ; does not fit below offset 4096 (408 + 4 + 8 - 4 = 4096), and has to live
@@ -314,10 +642,6 @@ entry:
   %bufv2 = bitcast i8 addrspace(5)* %alloca to <2 x i32> addrspace(5)*
 
   ; 0x3ff0000 / 64 = 4092 (for wave64)
-  ; MUBUF: s_add_i32 s4, s32, 0x3ff00
-  ; MUBUF: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s4 ; 4-byte Folded Spill
-  ; MUBUF: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s4 offset:4 ; 4-byte Folded Spill
-  ; FLATSCR: scratch_store_dwordx2 off, v[{{[0-9:]+}}], s32 offset:4092 ; 8-byte Folded Spill
   %aptr = getelementptr <2 x i32>, <2 x i32> addrspace(5)* %bufv2, i32 1
   %a = load volatile <2 x i32>, <2 x i32> addrspace(5)* %aptr
 

diff  --git a/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll b/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll
index e0fc1e19b1677..bdeb97cede4c3 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -march=amdgcn -mcpu=verde -enable-misched=0 -post-RA-scheduler=0 -amdgpu-spill-sgpr-to-vgpr=0 < %s | FileCheck -check-prefixes=CHECK,GFX6 %s
 ; RUN: llc -sgpr-regalloc=basic -vgpr-regalloc=basic -march=amdgcn -mcpu=tonga -enable-misched=0 -post-RA-scheduler=0 -amdgpu-spill-sgpr-to-vgpr=0 < %s | FileCheck --check-prefix=CHECK %s
 ; RUN: llc -march=amdgcn -mattr=-xnack,+enable-flat-scratch -mcpu=gfx900 -enable-misched=0 -post-RA-scheduler=0 -amdgpu-spill-sgpr-to-vgpr=0 < %s | FileCheck -check-prefixes=CHECK,GFX9-FLATSCR,FLATSCR %s
@@ -12,17 +13,10044 @@
 ; mechanism works even when many spills happen.
 
 ; Just test that it compiles successfully.
-; CHECK-LABEL: test
-
-; GFX9-FLATSCR: s_mov_b32 [[SOFF1:s[0-9]+]], 4{{$}}
-; GFX9-FLATSCR: scratch_store_dwordx4 off, v[{{[0-9:]+}}], [[SOFF1]] ; 16-byte Folded Spill
-; GFX9-FLATSCR: ;;#ASMSTART
-; GFX9-FLATSCR: s_movk_i32 [[SOFF2:s[0-9]+]], 0x1{{[0-9a-f]+}}{{$}}
-; GFX9-FLATSCR: scratch_load_dwordx4 v[{{[0-9:]+}}], off, [[SOFF2]] ; 16-byte Folded Reload
-
-; GFX10-FLATSCR: scratch_store_dwordx4 off, v[{{[0-9:]+}}], off offset:{{[0-9]+}} ; 16-byte Folded Spill
-; GFX10-FLATSCR: scratch_load_dwordx4 v[{{[0-9:]+}}], off, off offset:{{[0-9]+}} ; 16-byte Folded Reload
 define amdgpu_kernel void @test(<1280 x i32> addrspace(1)* %out, <1280 x i32> addrspace(1)* %in) {
+; GFX6-LABEL: test:
+; GFX6:       ; %bb.0: ; %entry
+; GFX6-NEXT:    s_mov_b32 s44, SCRATCH_RSRC_DWORD0
+; GFX6-NEXT:    s_mov_b32 s45, SCRATCH_RSRC_DWORD1
+; GFX6-NEXT:    s_mov_b32 s46, -1
+; GFX6-NEXT:    s_mov_b32 s47, 0xe8f000
+; GFX6-NEXT:    s_add_u32 s44, s44, s3
+; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; GFX6-NEXT:    v_mbcnt_lo_u32_b32_e64 v0, -1, 0
+; GFX6-NEXT:    v_mbcnt_hi_u32_b32_e32 v0, -1, v0
+; GFX6-NEXT:    v_lshlrev_b32_e32 v5, 13, v0
+; GFX6-NEXT:    s_mov_b32 s18, 0
+; GFX6-NEXT:    s_mov_b32 s19, 0xf000
+; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX6-NEXT:    v_add_i32_e32 v0, vcc, s2, v5
+; GFX6-NEXT:    v_mov_b32_e32 v1, s3
+; GFX6-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GFX6-NEXT:    s_movk_i32 s4, 0x80
+; GFX6-NEXT:    s_mov_b32 s5, s18
+; GFX6-NEXT:    s_mov_b64 s[6:7], s[18:19]
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[4:7], 0 addr64 offset:3968
+; GFX6-NEXT:    s_addc_u32 s45, s45, 0
+; GFX6-NEXT:    s_movk_i32 s8, 0x100
+; GFX6-NEXT:    s_mov_b32 s9, s18
+; GFX6-NEXT:    s_mov_b64 s[10:11], s[18:19]
+; GFX6-NEXT:    s_movk_i32 s12, 0x180
+; GFX6-NEXT:    s_mov_b32 s13, s18
+; GFX6-NEXT:    s_mov_b64 s[14:15], s[18:19]
+; GFX6-NEXT:    s_movk_i32 s20, 0x200
+; GFX6-NEXT:    s_mov_b32 s21, s18
+; GFX6-NEXT:    s_mov_b64 s[22:23], s[18:19]
+; GFX6-NEXT:    s_movk_i32 s24, 0x280
+; GFX6-NEXT:    s_mov_b32 s25, s18
+; GFX6-NEXT:    s_mov_b64 s[26:27], s[18:19]
+; GFX6-NEXT:    s_movk_i32 s28, 0x300
+; GFX6-NEXT:    s_mov_b32 s29, s18
+; GFX6-NEXT:    s_mov_b64 s[30:31], s[18:19]
+; GFX6-NEXT:    s_movk_i32 s36, 0x380
+; GFX6-NEXT:    s_mov_b32 s37, s18
+; GFX6-NEXT:    s_mov_b64 s[38:39], s[18:19]
+; GFX6-NEXT:    s_movk_i32 s40, 0x400
+; GFX6-NEXT:    s_mov_b32 s41, s18
+; GFX6-NEXT:    s_mov_b64 s[42:43], s[18:19]
+; GFX6-NEXT:    s_mov_b64 s[16:17], s[2:3]
+; GFX6-NEXT:    v_mov_b32_e32 v6, 0
+; GFX6-NEXT:    s_mov_b32 s2, 0x3fd00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:1268 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:1272 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:1276 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:1280 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[4:7], 0 addr64 offset:3984
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:1300 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:1304 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:1308 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:1312 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[4:7], 0 addr64 offset:4000
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:1332 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:1336 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:1340 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:1344 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[4:7], 0 addr64 offset:4016
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:1364 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:1368 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:1372 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:1376 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[4:7], 0 addr64 offset:4032
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:1396 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:1400 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:1404 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:1408 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[4:7], 0 addr64 offset:4048
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:1428 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:1432 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:1436 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:1440 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[4:7], 0 addr64 offset:4064
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:1460 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:1464 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:1468 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:1472 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[4:7], 0 addr64 offset:4080
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:1492 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:1496 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:1500 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:1504 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[8:11], 0 addr64 offset:3968
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:1556 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:1560 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:1564 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:1568 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[8:11], 0 addr64 offset:3984
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:1588 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:1592 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:1596 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:1600 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[8:11], 0 addr64 offset:4000
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:1620 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:1624 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:1628 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:1632 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[8:11], 0 addr64 offset:4016
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:1652 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:1656 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:1660 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:1664 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[8:11], 0 addr64 offset:4032
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:1684 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:1688 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:1692 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:1696 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[8:11], 0 addr64 offset:4048
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:1716 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:1720 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:1724 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:1728 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[8:11], 0 addr64 offset:4064
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:1748 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:1752 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:1756 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:1760 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[8:11], 0 addr64 offset:4080
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:1780 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:1784 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:1788 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:1792 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[12:15], 0 addr64 offset:3968
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:1860 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:1864 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:1868 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:1872 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[12:15], 0 addr64 offset:3984
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:1892 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:1896 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:1900 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:1904 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[12:15], 0 addr64 offset:4000
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:1924 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:1928 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:1932 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:1936 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[12:15], 0 addr64 offset:4016
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:1956 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:1960 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:1964 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:1968 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[12:15], 0 addr64 offset:4032
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:1988 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:1992 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:1996 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:2000 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[12:15], 0 addr64 offset:4048
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:2020 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:2024 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:2028 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:2032 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[12:15], 0 addr64 offset:4064
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:2052 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:2056 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:2060 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:2064 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[12:15], 0 addr64 offset:4080
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:2084 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:2088 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:2092 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:2096 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[20:23], 0 addr64 offset:3968
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:2148 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:2152 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:2156 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:2160 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[20:23], 0 addr64 offset:3984
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:2180 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:2184 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:2188 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:2192 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[20:23], 0 addr64 offset:4000
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:2212 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:2216 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:2220 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:2224 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[20:23], 0 addr64 offset:4016
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:2244 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:2248 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:2252 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:2256 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[20:23], 0 addr64 offset:4032
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:2276 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:2280 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:2284 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:2288 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[20:23], 0 addr64 offset:4048
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:2308 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:2312 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:2316 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:2320 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[20:23], 0 addr64 offset:4064
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:2340 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:2344 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:2348 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:2352 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[20:23], 0 addr64 offset:4080
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:2372 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:2376 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:2380 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:2384 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[24:27], 0 addr64 offset:3968
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:2452 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:2456 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:2460 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:2464 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[24:27], 0 addr64 offset:3984
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:2484 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:2488 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:2492 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:2496 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[24:27], 0 addr64 offset:4000
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:2516 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:2520 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:2524 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:2528 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[24:27], 0 addr64 offset:4016
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:2548 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:2552 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:2556 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:2560 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[24:27], 0 addr64 offset:4032
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:2580 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:2584 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:2588 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:2592 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[24:27], 0 addr64 offset:4048
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:2612 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:2616 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:2620 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:2624 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[24:27], 0 addr64 offset:4064
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:2644 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:2648 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:2652 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:2656 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[24:27], 0 addr64 offset:4080
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:2676 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:2680 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:2684 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:2688 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[28:31], 0 addr64 offset:3968
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:2740 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:2744 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:2748 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:2752 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[28:31], 0 addr64 offset:3984
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:2772 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:2776 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:2780 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:2784 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[28:31], 0 addr64 offset:4000
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:2804 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:2808 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:2812 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:2816 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[28:31], 0 addr64 offset:4016
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:2836 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:2840 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:2844 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:2848 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[28:31], 0 addr64 offset:4032
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:2868 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:2872 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:2876 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:2880 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[28:31], 0 addr64 offset:4048
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:2900 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:2904 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:2908 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:2912 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[28:31], 0 addr64 offset:4064
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:2932 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:2936 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:2940 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:2944 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[28:31], 0 addr64 offset:4080
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:2964 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:2968 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:2972 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:2976 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[36:39], 0 addr64 offset:3968
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:3044 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:3048 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:3052 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:3056 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[36:39], 0 addr64 offset:3984
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:3076 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:3080 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:3084 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:3088 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[36:39], 0 addr64 offset:4000
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:3108 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:3112 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:3116 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:3120 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[36:39], 0 addr64 offset:4016
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:3140 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:3144 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:3148 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:3152 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[36:39], 0 addr64 offset:4032
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:3172 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:3176 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:3180 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:3184 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[36:39], 0 addr64 offset:4048
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:3204 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:3208 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:3212 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:3216 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[36:39], 0 addr64 offset:4064
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:3236 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:3240 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:3244 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:3248 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[36:39], 0 addr64 offset:4080
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:3268 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:3272 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:3276 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:3280 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[40:43], 0 addr64 offset:3968
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:3332 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:3336 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:3340 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:3344 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[40:43], 0 addr64 offset:3984
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:3364 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:3368 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:3372 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:3376 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[40:43], 0 addr64 offset:4000
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:3396 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:3400 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:3404 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:3408 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[40:43], 0 addr64 offset:4016
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:3428 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:3432 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:3436 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:3440 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[40:43], 0 addr64 offset:4032
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:3460 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:3464 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:3468 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:3472 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[40:43], 0 addr64 offset:4048
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:3492 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:3496 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:3500 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:3504 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[0:1], s[40:43], 0 addr64 offset:4064
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v7, off, s[44:47], 0 offset:3524 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v8, off, s[44:47], 0 offset:3528 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[44:47], 0 offset:3532 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[44:47], 0 offset:3536 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[40:43], 0 addr64 offset:4080
+; GFX6-NEXT:    s_waitcnt expcnt(3)
+; GFX6-NEXT:    v_add_i32_e32 v7, vcc, s0, v5
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3556 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3560 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3564 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3568 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:16 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:16
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:20 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:24 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:28 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:32 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:32
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:36 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:40 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:44 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:48 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:48
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:52 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:56 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:60 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:64 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:64
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:68 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:72 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:76 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:80 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:80
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:84 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:88 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:92 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:96 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:96
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:100 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:104 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:108 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:112 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:112
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:116 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:120 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:124 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:128 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:128
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:132 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:136 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:140 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:144 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:144
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:148 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:152 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:156 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:160 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:160
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:164 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:168 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:172 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:176 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:176
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:180 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:184 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:188 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:192 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:192
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:196 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:200 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:204 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:208 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:208
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:212 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:216 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:220 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:224 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:224
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:228 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:232 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:236 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:240 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:240
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:244 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:248 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:252 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:256 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:256
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:260 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:264 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:268 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:272 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:272
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:276 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:280 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:284 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:288 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:288
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:292 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:296 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:300 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:304 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:304
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:308 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:312 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:316 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:320 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:320
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:324 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:328 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:332 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:336 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:336
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:340 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:344 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:348 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:352 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:352
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:356 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:360 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:364 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:368 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:368
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:372 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:376 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:380 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:384 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:384
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:388 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:392 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:396 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:400 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:400
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:404 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:408 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:412 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:416 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:416
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:420 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:424 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:428 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:432 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:432
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:436 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:440 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:444 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:448 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:448
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:452 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:456 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:460 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:464 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:464
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:468 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:472 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:476 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:480 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:480
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:484 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:488 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:492 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:496 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:496
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:500 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:504 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:508 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:512 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:512
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:516 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:520 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:524 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:528 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:528
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:532 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:536 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:540 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:544 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:544
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:548 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:552 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:556 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:560 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:560
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:564 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:568 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:572 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:576 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:576
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:580 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:584 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:588 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:592 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:592
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:596 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:600 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:604 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:608 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:608
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:612 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:616 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:620 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:624 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:624
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:628 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:632 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:636 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:640 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:640
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:644 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:648 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:652 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:656 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:656
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:660 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:664 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:668 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:672 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:672
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:676 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:680 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:684 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:688 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:688
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:692 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:696 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:700 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:704 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:704
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:708 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:712 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:716 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:720 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:720
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:724 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:728 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:732 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:736 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:736
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:740 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:744 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:748 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:752 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:752
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:756 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:760 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:764 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:768 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:768
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:772 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:776 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:780 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:784 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:784
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:788 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:792 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:796 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:800 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:800
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:804 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:808 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:812 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:816 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:816
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:820 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:824 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:828 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:832 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:832
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:836 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:840 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:844 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:848 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:848
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:852 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:856 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:860 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:864 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:864
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:868 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:872 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:876 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:880 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:880
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:884 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:888 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:892 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:896 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:896
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:900 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:904 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:908 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:912 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:912
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:916 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:920 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:924 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:928 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:928
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:932 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:936 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:940 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:944 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:944
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:948 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:952 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:956 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:960 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:960
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:964 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:968 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:972 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:976 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:976
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:980 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:984 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:988 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:992 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:992
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:996 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1000 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1004 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1008 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1008
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1012 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1016 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1020 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1024 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1024
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1028 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1032 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1036 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1040 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1040
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1044 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1048 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1052 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1056 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1056
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1060 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1064 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1068 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1072 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1072
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1076 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1080 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1084 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1088 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1088
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1092 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1096 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1100 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1104 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1104
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1108 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1112 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1116 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1120 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1120
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1124 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1128 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1132 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1136 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1136
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1140 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1144 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1148 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1152 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1152
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1156 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1160 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1164 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1168 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1168
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1172 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1176 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1180 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1184 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1184
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1188 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1192 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1196 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1200 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1200
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1204 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1208 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1212 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1216 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1216
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1220 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1224 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1228 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1232 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1232
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1236 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1240 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1244 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1248 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1248
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1252 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1256 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1260 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1264 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1264
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1284 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1288 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1292 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1296 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1280
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1316 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1320 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1324 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1328 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1296
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1348 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1352 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1356 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1360 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1312
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1380 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1384 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1388 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1392 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1328
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1412 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1416 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1420 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1424 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1344
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1444 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1448 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1452 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1456 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1360
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1476 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1480 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1484 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1488 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1376
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1508 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1512 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1516 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1520 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1392
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1524 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1528 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1532 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1536 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1408
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1540 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1544 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1548 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1552 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1424
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1572 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1576 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1580 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1584 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1440
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1604 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1608 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1612 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1616 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1456
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1636 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1640 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1644 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1648 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1472
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1668 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1672 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1676 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1680 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1488
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1700 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1704 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1708 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1712 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1504
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1732 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1736 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1740 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1744 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1520
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1764 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1768 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1772 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1776 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1536
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1796 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1800 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1804 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1808 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1552
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1812 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1816 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1820 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1824 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1568
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1828 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1832 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1836 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1840 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1584
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1844 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1848 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1852 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1856 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1600
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1876 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1880 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1884 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1888 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1616
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1908 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1912 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1916 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1920 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1632
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1940 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1944 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1948 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1952 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1648
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:1972 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:1976 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:1980 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:1984 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1664
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2004 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2008 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2012 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2016 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1680
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2036 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2040 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2044 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2048 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1696
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2068 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2072 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2076 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2080 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1712
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2100 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2104 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2108 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2112 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1728
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2116 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2120 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2124 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2128 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1744
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2132 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2136 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2140 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2144 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1760
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2164 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2168 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2172 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2176 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1776
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2196 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2200 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2204 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2208 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1792
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2228 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2232 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2236 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2240 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1808
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2260 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2264 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2268 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2272 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1824
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2292 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2296 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2300 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2304 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1840
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2324 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2328 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2332 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2336 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1856
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2356 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2360 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2364 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2368 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1872
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2388 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2392 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2396 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2400 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1888
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2404 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2408 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2412 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2416 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1904
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2420 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2424 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2428 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2432 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1920
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2436 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2440 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2444 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2448 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1936
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2468 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2472 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2476 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2480 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1952
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2500 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2504 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2508 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2512 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1968
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2532 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2536 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2540 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2544 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1984
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2564 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2568 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2572 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2576 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2000
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2596 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2600 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2604 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2608 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2016
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2628 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2632 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2636 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2640 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2032
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2660 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2664 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2668 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2672 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2048
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2692 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2696 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2700 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2704 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2064
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2708 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2712 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2716 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2720 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2080
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2724 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2728 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2732 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2736 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2096
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2756 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2760 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2764 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2768 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2112
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2788 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2792 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2796 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2800 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2128
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2820 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2824 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2828 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2832 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2144
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2852 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2856 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2860 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2864 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2160
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2884 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2888 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2892 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2896 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2176
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2916 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2920 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2924 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2928 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2192
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2948 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2952 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2956 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2960 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2208
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2980 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:2984 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:2988 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:2992 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2224
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:2996 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3000 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3004 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3008 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2240
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3012 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3016 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3020 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3024 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2256
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3028 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3032 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3036 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3040 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2272
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3060 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3064 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3068 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3072 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2288
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3092 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3096 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3100 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3104 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2304
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3124 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3128 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3132 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3136 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2320
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3156 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3160 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3164 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3168 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2336
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3188 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3192 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3196 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3200 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2352
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3220 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3224 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3228 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3232 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2368
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3252 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3256 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3260 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3264 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2384
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3284 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3288 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3292 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3296 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2400
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3300 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3304 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3308 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3312 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2416
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3316 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3320 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3324 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3328 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2432
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3348 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3352 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3356 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3360 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2448
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3380 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3384 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3388 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3392 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2464
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3412 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3416 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3420 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3424 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2480
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3444 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3448 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3452 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3456 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2496
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3476 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3480 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3484 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3488 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2512
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3508 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3512 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3516 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3520 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2528
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3540 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3544 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3548 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3552 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2544
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3572 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3576 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3580 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3584 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2560
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3588 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3592 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3596 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3600 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2576
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3604 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3608 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3612 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3616 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2592
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3620 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3624 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3628 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3632 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2608
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3636 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3640 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3644 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3648 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2624
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3652 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3656 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3660 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3664 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2640
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3668 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3672 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3676 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3680 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2656
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3684 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3688 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3692 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3696 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2672
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3700 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3704 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3708 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3712 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2688
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3716 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3720 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3724 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3728 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2704
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3732 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3736 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3740 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3744 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2720
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3748 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3752 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3756 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3760 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2736
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3764 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3768 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3772 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3776 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2752
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3780 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3784 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3788 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3792 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2768
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3796 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3800 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3804 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3808 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2784
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3812 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3816 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3820 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3824 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2800
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3828 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3832 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3836 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3840 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2816
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3844 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3848 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3852 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3856 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2832
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3860 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3864 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3868 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3872 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2848
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3876 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3880 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3884 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3888 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2864
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3892 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3896 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3900 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3904 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2880
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3908 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3912 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3916 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3920 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2896
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3924 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3928 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3932 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3936 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2912
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3940 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3944 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3948 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3952 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2928
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3956 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3960 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3964 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3968 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2944
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3972 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3976 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3980 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:3984 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2960
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:3988 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:3992 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:3996 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:4000 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2976
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:4004 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:4008 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:4012 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:4016 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2992
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:4020 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:4024 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:4028 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:4032 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3008
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:4036 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:4040 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:4044 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:4048 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3024
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:4052 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:4056 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:4060 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:4064 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3040
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], 0 offset:4068 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], 0 offset:4072 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], 0 offset:4076 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], 0 offset:4080 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3056
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3072
+; GFX6-NEXT:    s_mov_b32 s2, 0x40100
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3088
+; GFX6-NEXT:    s_mov_b32 s2, 0x40500
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3104
+; GFX6-NEXT:    s_mov_b32 s2, 0x40900
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3120
+; GFX6-NEXT:    s_mov_b32 s2, 0x40d00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3136
+; GFX6-NEXT:    s_mov_b32 s2, 0x41100
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3152
+; GFX6-NEXT:    s_mov_b32 s2, 0x41500
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3168
+; GFX6-NEXT:    s_mov_b32 s2, 0x41900
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3184
+; GFX6-NEXT:    s_mov_b32 s2, 0x41d00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3200
+; GFX6-NEXT:    s_mov_b32 s2, 0x42100
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3216
+; GFX6-NEXT:    s_mov_b32 s2, 0x42500
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3232
+; GFX6-NEXT:    s_mov_b32 s2, 0x42900
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3248
+; GFX6-NEXT:    s_mov_b32 s2, 0x42d00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3264
+; GFX6-NEXT:    s_mov_b32 s2, 0x43100
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3280
+; GFX6-NEXT:    s_mov_b32 s2, 0x43500
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3296
+; GFX6-NEXT:    s_mov_b32 s2, 0x43900
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3312
+; GFX6-NEXT:    s_mov_b32 s2, 0x43d00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3328
+; GFX6-NEXT:    s_mov_b32 s2, 0x44100
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3344
+; GFX6-NEXT:    s_mov_b32 s2, 0x44500
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3360
+; GFX6-NEXT:    s_mov_b32 s2, 0x44900
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3376
+; GFX6-NEXT:    s_mov_b32 s2, 0x44d00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3392
+; GFX6-NEXT:    s_mov_b32 s2, 0x45100
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3408
+; GFX6-NEXT:    s_mov_b32 s2, 0x45500
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3424
+; GFX6-NEXT:    s_mov_b32 s2, 0x45900
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3440
+; GFX6-NEXT:    s_mov_b32 s2, 0x45d00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3456
+; GFX6-NEXT:    s_mov_b32 s2, 0x46100
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3472
+; GFX6-NEXT:    s_mov_b32 s2, 0x46500
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3488
+; GFX6-NEXT:    s_mov_b32 s2, 0x46900
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3504
+; GFX6-NEXT:    s_mov_b32 s2, 0x46d00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3520
+; GFX6-NEXT:    s_mov_b32 s2, 0x47100
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3536
+; GFX6-NEXT:    s_mov_b32 s2, 0x47500
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3552
+; GFX6-NEXT:    s_mov_b32 s2, 0x47900
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3568
+; GFX6-NEXT:    s_mov_b32 s2, 0x47d00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3584
+; GFX6-NEXT:    s_mov_b32 s2, 0x48100
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3600
+; GFX6-NEXT:    s_mov_b32 s2, 0x48500
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3616
+; GFX6-NEXT:    s_mov_b32 s2, 0x48900
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3632
+; GFX6-NEXT:    s_mov_b32 s2, 0x48d00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3648
+; GFX6-NEXT:    s_mov_b32 s2, 0x49100
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3664
+; GFX6-NEXT:    s_mov_b32 s2, 0x49500
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3680
+; GFX6-NEXT:    s_mov_b32 s2, 0x49900
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3696
+; GFX6-NEXT:    s_mov_b32 s2, 0x49d00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3712
+; GFX6-NEXT:    s_mov_b32 s2, 0x4a100
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3728
+; GFX6-NEXT:    s_mov_b32 s2, 0x4a500
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3744
+; GFX6-NEXT:    s_mov_b32 s2, 0x4a900
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3760
+; GFX6-NEXT:    s_mov_b32 s2, 0x4ad00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3776
+; GFX6-NEXT:    s_mov_b32 s2, 0x4b100
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3792
+; GFX6-NEXT:    s_mov_b32 s2, 0x4b500
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3808
+; GFX6-NEXT:    s_mov_b32 s2, 0x4b900
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3824
+; GFX6-NEXT:    s_mov_b32 s2, 0x4bd00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3840
+; GFX6-NEXT:    s_mov_b32 s2, 0x4c100
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3856
+; GFX6-NEXT:    s_mov_b32 s2, 0x4c500
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3872
+; GFX6-NEXT:    s_mov_b32 s2, 0x4c900
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3888
+; GFX6-NEXT:    s_mov_b32 s2, 0x4cd00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3904
+; GFX6-NEXT:    s_mov_b32 s2, 0x4d100
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3920
+; GFX6-NEXT:    s_mov_b32 s2, 0x4d500
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3936
+; GFX6-NEXT:    s_mov_b32 s2, 0x4d900
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3952
+; GFX6-NEXT:    s_mov_b32 s2, 0x4dd00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3968
+; GFX6-NEXT:    s_mov_b32 s2, 0x4e100
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3984
+; GFX6-NEXT:    s_mov_b32 s2, 0x4e500
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:4000
+; GFX6-NEXT:    s_mov_b32 s2, 0x4e900
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:4016
+; GFX6-NEXT:    s_mov_b32 s2, 0x4ed00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:4032
+; GFX6-NEXT:    s_mov_b32 s2, 0x4f100
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:4048
+; GFX6-NEXT:    s_mov_b32 s2, 0x4f500
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:4064
+; GFX6-NEXT:    s_mov_b32 s2, 0x4f900
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:4080
+; GFX6-NEXT:    ;;#ASMSTART
+; GFX6-NEXT:    ;;#ASMEND
+; GFX6-NEXT:    ;;#ASMSTART
+; GFX6-NEXT:    ;;#ASMEND
+; GFX6-NEXT:    ;;#ASMSTART
+; GFX6-NEXT:    ;;#ASMEND
+; GFX6-NEXT:    ;;#ASMSTART
+; GFX6-NEXT:    ;;#ASMEND
+; GFX6-NEXT:    ;;#ASMSTART
+; GFX6-NEXT:    ;;#ASMEND
+; GFX6-NEXT:    ;;#ASMSTART
+; GFX6-NEXT:    ;;#ASMEND
+; GFX6-NEXT:    ;;#ASMSTART
+; GFX6-NEXT:    ;;#ASMEND
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:3556 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:3560 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:3564 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:3568 ; 4-byte Folded Reload
+; GFX6-NEXT:    v_mov_b32_e32 v4, s1
+; GFX6-NEXT:    v_addc_u32_e32 v8, vcc, 0, v4, vcc
+; GFX6-NEXT:    s_mov_b64 s[2:3], s[18:19]
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[40:43], 0 addr64 offset:4080
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:3524 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:3528 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:3532 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:3536 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[40:43], 0 addr64 offset:4064
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:3492 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:3496 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:3500 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:3504 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[40:43], 0 addr64 offset:4048
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:3460 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:3464 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:3468 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:3472 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[40:43], 0 addr64 offset:4032
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:3428 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:3432 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:3436 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:3440 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[40:43], 0 addr64 offset:4016
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:3396 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:3400 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:3404 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:3408 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[40:43], 0 addr64 offset:4000
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:3364 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:3368 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:3372 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:3376 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[40:43], 0 addr64 offset:3984
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:3332 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:3336 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:3340 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:3344 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[40:43], 0 addr64 offset:3968
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:3268 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:3272 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:3276 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:3280 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[36:39], 0 addr64 offset:4080
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:3236 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:3240 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:3244 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:3248 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[36:39], 0 addr64 offset:4064
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:3204 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:3208 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:3212 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:3216 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[36:39], 0 addr64 offset:4048
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:3172 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:3176 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:3180 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:3184 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[36:39], 0 addr64 offset:4032
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:3140 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:3144 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:3148 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:3152 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[36:39], 0 addr64 offset:4016
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:3108 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:3112 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:3116 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:3120 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[36:39], 0 addr64 offset:4000
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:3076 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:3080 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:3084 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:3088 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[36:39], 0 addr64 offset:3984
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:3044 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:3048 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:3052 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:3056 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[36:39], 0 addr64 offset:3968
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:2964 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:2968 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:2972 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:2976 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[28:31], 0 addr64 offset:4080
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:2932 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:2936 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:2940 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:2944 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[28:31], 0 addr64 offset:4064
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:2900 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:2904 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:2908 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:2912 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[28:31], 0 addr64 offset:4048
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:2868 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:2872 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:2876 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:2880 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[28:31], 0 addr64 offset:4032
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:2836 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:2840 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:2844 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:2848 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[28:31], 0 addr64 offset:4016
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:2804 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:2808 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:2812 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:2816 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[28:31], 0 addr64 offset:4000
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:2772 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:2776 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:2780 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:2784 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[28:31], 0 addr64 offset:3984
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:2740 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:2744 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:2748 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:2752 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[28:31], 0 addr64 offset:3968
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:2676 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:2680 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:2684 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:2688 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[24:27], 0 addr64 offset:4080
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:2644 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:2648 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:2652 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:2656 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[24:27], 0 addr64 offset:4064
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:2612 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:2616 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:2620 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:2624 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[24:27], 0 addr64 offset:4048
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:2580 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:2584 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:2588 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:2592 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[24:27], 0 addr64 offset:4032
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:2548 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:2552 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:2556 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:2560 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[24:27], 0 addr64 offset:4016
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:2516 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:2520 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:2524 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:2528 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[24:27], 0 addr64 offset:4000
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:2484 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:2488 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:2492 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:2496 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[24:27], 0 addr64 offset:3984
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:2452 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:2456 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:2460 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:2464 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[24:27], 0 addr64 offset:3968
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:2372 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:2376 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:2380 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:2384 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[20:23], 0 addr64 offset:4080
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:2340 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:2344 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:2348 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:2352 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[20:23], 0 addr64 offset:4064
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:2308 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:2312 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:2316 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:2320 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[20:23], 0 addr64 offset:4048
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:2276 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:2280 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:2284 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:2288 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[20:23], 0 addr64 offset:4032
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:2244 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:2248 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:2252 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:2256 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[20:23], 0 addr64 offset:4016
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:2212 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:2216 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:2220 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:2224 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[20:23], 0 addr64 offset:4000
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:2180 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:2184 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:2188 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:2192 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[20:23], 0 addr64 offset:3984
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:2148 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:2152 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:2156 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:2160 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[20:23], 0 addr64 offset:3968
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:2084 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:2088 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:2092 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:2096 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[12:15], 0 addr64 offset:4080
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:2052 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:2056 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:2060 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:2064 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[12:15], 0 addr64 offset:4064
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:2020 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:2024 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:2028 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:2032 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[12:15], 0 addr64 offset:4048
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:1988 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:1992 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:1996 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:2000 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[12:15], 0 addr64 offset:4032
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:1956 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:1960 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:1964 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:1968 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[12:15], 0 addr64 offset:4016
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:1924 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:1928 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:1932 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:1936 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[12:15], 0 addr64 offset:4000
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:1892 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:1896 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:1900 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:1904 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[12:15], 0 addr64 offset:3984
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:1860 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:1864 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:1868 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:1872 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[12:15], 0 addr64 offset:3968
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:1780 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:1784 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:1788 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:1792 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[8:11], 0 addr64 offset:4080
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:1748 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:1752 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:1756 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:1760 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[8:11], 0 addr64 offset:4064
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:1716 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:1720 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:1724 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:1728 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[8:11], 0 addr64 offset:4048
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:1684 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:1688 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:1692 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:1696 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[8:11], 0 addr64 offset:4032
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:1652 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:1656 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:1660 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:1664 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[8:11], 0 addr64 offset:4016
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:1620 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:1624 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:1628 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:1632 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[8:11], 0 addr64 offset:4000
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:1588 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:1592 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:1596 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:1600 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[8:11], 0 addr64 offset:3984
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:1556 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:1560 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:1564 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:1568 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[8:11], 0 addr64 offset:3968
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:1492 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:1496 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:1500 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:1504 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[4:7], 0 addr64 offset:4080
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:1460 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:1464 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:1468 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:1472 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[4:7], 0 addr64 offset:4064
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:1428 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:1432 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:1436 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:1440 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[4:7], 0 addr64 offset:4048
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:1396 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:1400 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:1404 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:1408 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[4:7], 0 addr64 offset:4032
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:1364 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:1368 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:1372 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:1376 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[4:7], 0 addr64 offset:4016
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:1332 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:1336 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:1340 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:1344 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[4:7], 0 addr64 offset:4000
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:1300 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:1304 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:1308 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:1312 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[4:7], 0 addr64 offset:3984
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[44:47], 0 offset:1268 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v10, off, s[44:47], 0 offset:1272 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v11, off, s[44:47], 0 offset:1276 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v12, off, s[44:47], 0 offset:1280 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], v[7:8], s[4:7], 0 addr64 offset:3968
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:4080
+; GFX6-NEXT:    s_mov_b32 s4, 0x4f900
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x4f500
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:4064
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x4f100
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:4048
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x4ed00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:4032
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x4e900
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:4016
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x4e500
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:4000
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x4e100
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3984
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x4dd00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3968
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x4d900
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3952
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x4d500
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3936
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x4d100
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3920
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x4cd00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3904
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x4c900
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3888
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x4c500
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3872
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x4c100
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3856
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x4bd00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3840
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x4b900
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3824
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x4b500
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3808
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x4b100
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3792
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x4ad00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3776
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x4a900
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3760
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x4a500
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3744
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x4a100
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3728
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x49d00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3712
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x49900
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3696
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x49500
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3680
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x49100
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3664
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x48d00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3648
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x48900
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3632
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x48500
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3616
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x48100
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3600
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x47d00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3584
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x47900
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3568
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x47500
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3552
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x47100
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3536
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x46d00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3520
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x46900
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3504
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x46500
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3488
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x46100
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3472
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x45d00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3456
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x45900
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3440
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x45500
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3424
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x45100
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3408
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x44d00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3392
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x44900
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3376
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x44500
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3360
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x44100
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3344
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x43d00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3328
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x43900
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3312
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x43500
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3296
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x43100
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3280
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x42d00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3264
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x42900
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3248
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x42500
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3232
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x42100
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3216
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x41d00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3200
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x41900
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3184
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x41500
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3168
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x41100
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3152
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x40d00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3136
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x40900
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3120
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x40500
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3104
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x40100
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3088
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x3fd00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3072
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3056
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:4068 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:4072 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:4076 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:4080 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3040
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:4052 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:4056 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:4060 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:4064 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3024
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:4036 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:4040 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:4044 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:4048 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3008
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:4020 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:4024 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:4028 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:4032 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2992
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:4004 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:4008 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:4012 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:4016 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2976
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3988 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3992 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3996 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:4000 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2960
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3972 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3976 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3980 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3984 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2944
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3956 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3960 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3964 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3968 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2928
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3940 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3944 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3948 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3952 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2912
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3924 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3928 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3932 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3936 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2896
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3908 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3912 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3916 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3920 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2880
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3892 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3896 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3900 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3904 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2864
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3876 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3880 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3884 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3888 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2848
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3860 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3864 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3868 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3872 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2832
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3844 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3848 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3852 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3856 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2816
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3828 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3832 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3836 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3840 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2800
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3812 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3816 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3820 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3824 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2784
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3796 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3800 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3804 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3808 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2768
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3780 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3784 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3788 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3792 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2752
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3764 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3768 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3772 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3776 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2736
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3748 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3752 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3756 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3760 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2720
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3732 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3736 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3740 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3744 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2704
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3716 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3720 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3724 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3728 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2688
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3700 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3704 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3708 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3712 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2672
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3684 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3688 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3692 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3696 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2656
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3668 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3672 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3676 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3680 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2640
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3652 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3656 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3660 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3664 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2624
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3636 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3640 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3644 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3648 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2608
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3620 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3624 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3628 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3632 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2592
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3604 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3608 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3612 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3616 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2576
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3588 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3592 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3596 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3600 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2560
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3572 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3576 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3580 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3584 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2544
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3540 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3544 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3548 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3552 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2528
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3508 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3512 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3516 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3520 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2512
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3476 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3480 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3484 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3488 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2496
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3444 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3448 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3452 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3456 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2480
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3412 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3416 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3420 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3424 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2464
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3380 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3384 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3388 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3392 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2448
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3348 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3352 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3356 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3360 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2432
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3316 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3320 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3324 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3328 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2416
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3300 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3304 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3308 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3312 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2400
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3284 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3288 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3292 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3296 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2384
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3252 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3256 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3260 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3264 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2368
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3220 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3224 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3228 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3232 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2352
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3188 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3192 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3196 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3200 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2336
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3156 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3160 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3164 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3168 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2320
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3124 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3128 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3132 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3136 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2304
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3092 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3096 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3100 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3104 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2288
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3060 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3064 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3068 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3072 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2272
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3028 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3032 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3036 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3040 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2256
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:3012 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3016 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3020 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3024 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2240
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2996 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:3000 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:3004 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:3008 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2224
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2980 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2984 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2988 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2992 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2208
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2948 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2952 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2956 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2960 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2192
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2916 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2920 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2924 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2928 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2176
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2884 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2888 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2892 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2896 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2160
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2852 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2856 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2860 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2864 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2144
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2820 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2824 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2828 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2832 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2128
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2788 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2792 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2796 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2800 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2112
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2756 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2760 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2764 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2768 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2096
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2724 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2728 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2732 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2736 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2080
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2708 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2712 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2716 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2720 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2064
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2692 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2696 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2700 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2704 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2048
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2660 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2664 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2668 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2672 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2032
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2628 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2632 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2636 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2640 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2016
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2596 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2600 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2604 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2608 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2000
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2564 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2568 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2572 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2576 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1984
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2532 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2536 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2540 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2544 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1968
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2500 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2504 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2508 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2512 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1952
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2468 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2472 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2476 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2480 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1936
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2436 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2440 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2444 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2448 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1920
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2420 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2424 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2428 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2432 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1904
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2404 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2408 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2412 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2416 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1888
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2388 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2392 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2396 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2400 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1872
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2356 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2360 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2364 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2368 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1856
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2324 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2328 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2332 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2336 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1840
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2292 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2296 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2300 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2304 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1824
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2260 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2264 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2268 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2272 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1808
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2228 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2232 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2236 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2240 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1792
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2196 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2200 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2204 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2208 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1776
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2164 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2168 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2172 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2176 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1760
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2132 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2136 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2140 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2144 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1744
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2116 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2120 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2124 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2128 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1728
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2100 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2104 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2108 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2112 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1712
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2068 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2072 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2076 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2080 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1696
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2036 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2040 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2044 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2048 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1680
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:2004 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:2008 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:2012 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:2016 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1664
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1972 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1976 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1980 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1984 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1648
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1940 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1944 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1948 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1952 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1632
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1908 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1912 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1916 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1920 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1616
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1876 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1880 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1884 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1888 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1600
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1844 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1848 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1852 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1856 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1584
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1828 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1832 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1836 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1840 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1568
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1812 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1816 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1820 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1824 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1552
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1796 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1800 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1804 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1808 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1536
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1764 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1768 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1772 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1776 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1520
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1732 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1736 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1740 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1744 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1504
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1700 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1704 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1708 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1712 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1488
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1668 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1672 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1676 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1680 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1472
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1636 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1640 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1644 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1648 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1456
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1604 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1608 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1612 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1616 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1440
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1572 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1576 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1580 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1584 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1424
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1540 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1544 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1548 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1552 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1408
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1524 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1528 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1532 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1536 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1392
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1508 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1512 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1516 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1520 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1376
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1476 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1480 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1484 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1488 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1360
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1444 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1448 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1452 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1456 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1344
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1412 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1416 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1420 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1424 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1328
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1380 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1384 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1388 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1392 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1312
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1348 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1352 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1356 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1360 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1296
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1316 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1320 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1324 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1328 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1280
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1284 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1288 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1292 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1296 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1264
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1252 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1256 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1260 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1264 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1248
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1236 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1240 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1244 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1248 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1232
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1220 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1224 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1228 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1232 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1216
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1204 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1208 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1212 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1216 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1200
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1188 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1192 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1196 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1200 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1184
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1172 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1176 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1180 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1184 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1168
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1156 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1160 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1164 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1168 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1152
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1140 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1144 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1148 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1152 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1136
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1124 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1128 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1132 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1136 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1120
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1108 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1112 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1116 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1120 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1104
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1092 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1096 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1100 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1104 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1088
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1076 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1080 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1084 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1088 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1072
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1060 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1064 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1068 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1072 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1056
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1044 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1048 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1052 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1056 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1040
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1028 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1032 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1036 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1040 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1024
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:1012 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1016 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1020 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1024 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1008
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:996 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:1000 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:1004 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:1008 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:992
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:980 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:984 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:988 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:992 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:976
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:964 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:968 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:972 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:976 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:960
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:948 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:952 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:956 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:960 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:944
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:932 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:936 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:940 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:944 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:928
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:916 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:920 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:924 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:928 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:912
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:900 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:904 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:908 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:912 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:896
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:884 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:888 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:892 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:896 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:880
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:868 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:872 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:876 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:880 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:864
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:852 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:856 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:860 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:864 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:848
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:836 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:840 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:844 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:848 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:832
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:820 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:824 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:828 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:832 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:816
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:804 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:808 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:812 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:816 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:800
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:788 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:792 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:796 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:800 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:784
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:772 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:776 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:780 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:784 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:768
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:756 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:760 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:764 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:768 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:752
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:740 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:744 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:748 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:752 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:736
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:724 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:728 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:732 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:736 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:720
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:708 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:712 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:716 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:720 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:704
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:692 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:696 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:700 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:704 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:688
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:676 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:680 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:684 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:688 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:672
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:660 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:664 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:668 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:672 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:656
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:644 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:648 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:652 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:656 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:640
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:628 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:632 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:636 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:640 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:624
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:612 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:616 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:620 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:624 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:608
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:596 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:600 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:604 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:608 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:592
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:580 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:584 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:588 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:592 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:576
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:564 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:568 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:572 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:576 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:560
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:548 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:552 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:556 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:560 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:544
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:532 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:536 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:540 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:544 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:528
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:516 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:520 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:524 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:528 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:512
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:500 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:504 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:508 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:512 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:496
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:484 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:488 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:492 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:496 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:480
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:468 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:472 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:476 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:480 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:464
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:452 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:456 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:460 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:464 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:448
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:436 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:440 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:444 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:448 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:432
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:420 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:424 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:428 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:432 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:416
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:404 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:408 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:412 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:416 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:400
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:388 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:392 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:396 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:400 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:384
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:372 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:376 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:380 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:384 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:368
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:356 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:360 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:364 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:368 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:352
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:340 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:344 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:348 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:352 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:336
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:324 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:328 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:332 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:336 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:320
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:308 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:312 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:316 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:320 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:304
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:292 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:296 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:300 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:304 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:288
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:276 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:280 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:284 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:288 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:272
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:260 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:264 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:268 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:272 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:256
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:244 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:248 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:252 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:256 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:240
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:228 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:232 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:236 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:240 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:224
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:212 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:216 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:220 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:224 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:208
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:196 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:200 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:204 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:208 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:192
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:180 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:184 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:188 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:192 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:176
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:164 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:168 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:172 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:176 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:160
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:148 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:152 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:156 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:160 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:144
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:132 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:136 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:140 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:144 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:128
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:116 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:120 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:124 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:128 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:112
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:100 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:104 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:108 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:112 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:96
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:84 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:88 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:92 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:96 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:80
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:68 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:72 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:76 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:80 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:64
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:52 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:56 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:60 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:64 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:48
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:36 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:40 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:44 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:48 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:32
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:20 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:24 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:28 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:32 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:16
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[44:47], 0 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[44:47], 0 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[44:47], 0 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[44:47], 0 offset:16 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64
+; GFX6-NEXT:    s_endpgm
+;
+; GFX9-FLATSCR-LABEL: test:
+; GFX9-FLATSCR:       ; %bb.0: ; %entry
+; GFX9-FLATSCR-NEXT:    s_add_u32 flat_scratch_lo, s2, s5
+; GFX9-FLATSCR-NEXT:    s_addc_u32 flat_scratch_hi, s3, 0
+; GFX9-FLATSCR-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX9-FLATSCR-NEXT:    v_mbcnt_lo_u32_b32 v0, -1, 0
+; GFX9-FLATSCR-NEXT:    v_mbcnt_hi_u32_b32 v0, -1, v0
+; GFX9-FLATSCR-NEXT:    v_lshlrev_b32_e32 v5, 13, v0
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x80
+; GFX9-FLATSCR-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-FLATSCR-NEXT:    v_add_co_u32_e32 v2, vcc, s2, v5
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v0, s3
+; GFX9-FLATSCR-NEXT:    v_addc_co_u32_e32 v3, vcc, 0, v0, vcc
+; GFX9-FLATSCR-NEXT:    v_add_co_u32_e32 v0, vcc, s4, v2
+; GFX9-FLATSCR-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:3968
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s4, 4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s5, 0x84
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s6, 0x104
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s7, 0x184
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s8, 0x204
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s9, 0x284
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s10, 0x304
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x384
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:3984
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s4, 20
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4000
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s4, 36
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4016
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s4, 52
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4032
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x44
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4048
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x54
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4064
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x64
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4080
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x74
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x100
+; GFX9-FLATSCR-NEXT:    v_add_co_u32_e32 v0, vcc, s4, v2
+; GFX9-FLATSCR-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:3968
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s5 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:3984
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s5, 0x94
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s5 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4000
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s5, 0xa4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s5 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4016
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s5, 0xb4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s5 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4032
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s5, 0xc4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s5 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4048
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s5, 0xd4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s5 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4064
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s5, 0xe4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s5 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4080
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s5, 0xf4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s5 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s5, 0x180
+; GFX9-FLATSCR-NEXT:    v_add_co_u32_e32 v0, vcc, s5, v2
+; GFX9-FLATSCR-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:3968
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s6 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:3984
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s6, 0x114
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s6 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4000
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s6, 0x124
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s6 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4016
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s6, 0x134
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s6 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4032
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s6, 0x144
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s6 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4048
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s6, 0x154
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s6 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4064
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s6, 0x164
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s6 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4080
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s6, 0x174
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s6 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s6, 0x200
+; GFX9-FLATSCR-NEXT:    v_add_co_u32_e32 v0, vcc, s6, v2
+; GFX9-FLATSCR-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:3968
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s7 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:3984
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s7, 0x194
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s7 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4000
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s7, 0x1a4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s7 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4016
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s7, 0x1b4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s7 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4032
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s7, 0x1c4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s7 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4048
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s7, 0x1d4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s7 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4064
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s7, 0x1e4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s7 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4080
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s7, 0x1f4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s7 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s7, 0x280
+; GFX9-FLATSCR-NEXT:    v_add_co_u32_e32 v0, vcc, s7, v2
+; GFX9-FLATSCR-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:3968
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s8 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:3984
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s8, 0x214
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s8 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4000
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s8, 0x224
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s8 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4016
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s8, 0x234
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s8 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4032
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s8, 0x244
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s8 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4048
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s8, 0x254
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s8 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4064
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s8, 0x264
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s8 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4080
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s8, 0x274
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s8 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s8, 0x300
+; GFX9-FLATSCR-NEXT:    v_add_co_u32_e32 v0, vcc, s8, v2
+; GFX9-FLATSCR-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:3968
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s9 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:3984
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s9, 0x294
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s9 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4000
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s9, 0x2a4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s9 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4016
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s9, 0x2b4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s9 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4032
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s9, 0x2c4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s9 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4048
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s9, 0x2d4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s9 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4064
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s9, 0x2e4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s9 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4080
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s9, 0x2f4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s9 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s9, 0x380
+; GFX9-FLATSCR-NEXT:    v_add_co_u32_e32 v0, vcc, s9, v2
+; GFX9-FLATSCR-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:3968
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s10 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:3984
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s10, 0x314
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s10 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4000
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s10, 0x324
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s10 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4016
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s10, 0x334
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s10 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4032
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s10, 0x344
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s10 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4048
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s10, 0x354
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s10 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4064
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s10, 0x364
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s10 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4080
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s10, 0x374
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s10 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s10, 0x400
+; GFX9-FLATSCR-NEXT:    v_add_co_u32_e32 v0, vcc, s10, v2
+; GFX9-FLATSCR-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:3968
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:3984
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x394
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4000
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x3a4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4016
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x3b4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4032
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x3c4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4048
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x3d4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4064
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4080
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x3e4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(1)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x3f4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(1)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3]
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x404
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v6, s1
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:16
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x414
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:32
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x424
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:48
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x434
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:64
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x444
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:80
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x454
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:96
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x464
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:112
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x474
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:128
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x484
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:144
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x494
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:160
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x4a4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:176
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x4b4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:192
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x4c4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:208
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x4d4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:224
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x4e4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:240
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x4f4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:256
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x504
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:272
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x514
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:288
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x524
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:304
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x534
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:320
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x544
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:336
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x554
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:352
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x564
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:368
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x574
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:384
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x584
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:400
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x594
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:416
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x5a4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:432
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x5b4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:448
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x5c4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:464
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x5d4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:480
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x5e4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:496
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x5f4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:512
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x604
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:528
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x614
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:544
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x624
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:560
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x634
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:576
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x644
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:592
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x654
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:608
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x664
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:624
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x674
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:640
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x684
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:656
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x694
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:672
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x6a4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:688
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x6b4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:704
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x6c4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:720
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x6d4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:736
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x6e4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:752
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x6f4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:768
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x704
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:784
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x714
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:800
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x724
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:816
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x734
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:832
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x744
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:848
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x754
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:864
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x764
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:880
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x774
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:896
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x784
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:912
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x794
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:928
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x7a4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:944
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x7b4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:960
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x7c4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:976
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x7d4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:992
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x7e4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1008
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x7f4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1024
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x804
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1040
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x814
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1056
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x824
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1072
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x834
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1088
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x844
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1104
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x854
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1120
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x864
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1136
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x874
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1152
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x884
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1168
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x894
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1184
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x8a4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1200
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x8b4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1216
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x8c4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1232
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x8d4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1248
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x8e4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1264
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x8f4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1280
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x904
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1296
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x914
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1312
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x924
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1328
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x934
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1344
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x944
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1360
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x954
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1376
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x964
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1392
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x974
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1408
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x984
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1424
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x994
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1440
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x9a4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1456
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x9b4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1472
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x9c4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1488
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x9d4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1504
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x9e4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1520
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x9f4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1536
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xa04
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1552
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xa14
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1568
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xa24
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1584
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xa34
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1600
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xa44
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1616
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xa54
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1632
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xa64
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1648
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xa74
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1664
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xa84
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1680
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xa94
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1696
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xaa4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1712
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xab4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1728
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xac4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1744
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xad4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1760
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xae4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1776
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xaf4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1792
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xb04
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1808
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xb14
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1824
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xb24
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1840
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xb34
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1856
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xb44
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1872
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xb54
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1888
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xb64
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1904
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xb74
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1920
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xb84
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1936
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xb94
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1952
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xba4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1968
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xbb4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1984
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xbc4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2000
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xbd4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2016
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xbe4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2032
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xbf4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2048
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xc04
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2064
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xc14
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2080
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xc24
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2096
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xc34
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2112
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xc44
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2128
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xc54
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2144
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xc64
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2160
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xc74
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2176
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xc84
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2192
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xc94
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2208
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xca4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2224
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xcb4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2240
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xcc4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2256
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xcd4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2272
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xce4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2288
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xcf4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2304
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xd04
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2320
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xd14
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2336
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xd24
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2352
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xd34
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2368
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xd44
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2384
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xd54
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2400
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xd64
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2416
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xd74
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2432
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xd84
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2448
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xd94
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2464
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xda4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2480
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xdb4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2496
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xdc4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2512
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xdd4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2528
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xde4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2544
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xdf4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2560
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xe04
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2576
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xe14
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2592
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xe24
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2608
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xe34
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2624
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xe44
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2640
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xe54
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2656
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xe64
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2672
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xe74
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2688
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xe84
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2704
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xe94
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2720
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xea4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2736
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xeb4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2752
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xec4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2768
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xed4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2784
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xee4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2800
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xef4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2816
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xf04
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2832
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xf14
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2848
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xf24
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2864
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xf34
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2880
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xf44
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2896
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xf54
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2912
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xf64
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2928
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xf74
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2944
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xf84
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2960
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xf94
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2976
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xfa4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2992
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xfb4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3008
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xfc4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3024
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xfd4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3040
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xfe4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3056
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xff4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3072
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1004
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3088
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1014
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3104
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1024
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3120
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1034
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3136
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1044
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3152
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1054
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3168
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1064
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3184
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1074
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3200
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1084
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3216
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1094
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3232
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x10a4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3248
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x10b4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3264
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x10c4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3280
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x10d4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3296
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x10e4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3312
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x10f4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3328
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1104
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3344
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1114
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3360
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1124
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3376
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1134
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3392
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1144
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3408
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1154
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3424
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1164
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3440
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1174
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3456
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1184
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3472
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1194
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3488
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x11a4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3504
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x11b4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3520
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x11c4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3536
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x11d4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3552
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x11e4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3568
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x11f4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3584
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1204
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3600
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1214
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3616
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1224
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3632
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1234
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3648
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1244
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3664
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1254
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3680
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1264
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3696
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1274
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3712
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1284
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3728
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1294
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3744
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x12a4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3760
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x12b4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3776
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x12c4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3792
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x12d4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3808
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x12e4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3824
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x12f4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3840
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1304
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3856
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1314
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3872
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1324
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3888
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1334
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3904
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1344
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3920
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1354
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3936
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1364
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3952
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1374
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3968
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1384
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3984
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1394
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:4000
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x13a4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:4016
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x13b4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:4032
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x13c4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:4048
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x13d4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:4064
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x13e4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:4080
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x13e4
+; GFX9-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX9-FLATSCR-NEXT:    ;;#ASMEND
+; GFX9-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX9-FLATSCR-NEXT:    ;;#ASMEND
+; GFX9-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX9-FLATSCR-NEXT:    ;;#ASMEND
+; GFX9-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX9-FLATSCR-NEXT:    ;;#ASMEND
+; GFX9-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX9-FLATSCR-NEXT:    ;;#ASMEND
+; GFX9-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX9-FLATSCR-NEXT:    ;;#ASMEND
+; GFX9-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX9-FLATSCR-NEXT:    ;;#ASMEND
+; GFX9-FLATSCR-NEXT:    v_add_co_u32_e32 v4, vcc, s0, v5
+; GFX9-FLATSCR-NEXT:    v_addc_co_u32_e32 v6, vcc, 0, v6, vcc
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:4080
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x13d4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:4064
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x13c4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:4048
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x13b4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:4032
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x13a4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:4016
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1394
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:4000
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1384
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3984
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1374
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3968
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1364
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3952
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1354
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3936
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1344
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3920
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1334
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3904
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1324
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3888
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1314
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3872
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1304
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3856
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x12f4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3840
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x12e4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3824
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x12d4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3808
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x12c4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3792
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x12b4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3776
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x12a4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3760
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1294
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3744
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1284
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3728
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1274
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3712
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1264
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3696
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1254
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3680
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1244
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3664
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1234
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3648
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1224
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3632
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1214
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3616
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1204
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3600
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x11f4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3584
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x11e4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3568
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x11d4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3552
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x11c4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3536
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x11b4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3520
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x11a4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3504
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1194
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3488
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1184
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3472
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1174
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3456
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1164
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3440
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1154
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3424
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1144
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3408
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1134
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3392
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1124
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3376
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1114
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3360
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1104
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3344
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x10f4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3328
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x10e4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3312
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x10d4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3296
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x10c4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3280
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x10b4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3264
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x10a4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3248
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1094
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3232
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1084
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3216
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1074
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3200
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1064
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3184
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1054
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3168
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1044
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3152
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1034
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3136
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1024
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3120
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1014
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3104
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x1004
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3088
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xff4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3072
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xfe4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3056
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xfd4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3040
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xfc4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3024
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xfb4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:3008
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xfa4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2992
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xf94
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2976
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xf84
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2960
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xf74
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2944
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xf64
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2928
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xf54
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2912
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xf44
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2896
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xf34
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2880
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xf24
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2864
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xf14
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2848
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xf04
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2832
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xef4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2816
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xee4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2800
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xed4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2784
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xec4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2768
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xeb4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2752
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xea4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2736
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xe94
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2720
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xe84
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2704
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xe74
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2688
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xe64
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2672
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xe54
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2656
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xe44
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2640
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xe34
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2624
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xe24
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2608
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xe14
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2592
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xe04
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2576
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xdf4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2560
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xde4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2544
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xdd4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2528
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xdc4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2512
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xdb4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2496
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xda4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2480
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xd94
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2464
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xd84
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2448
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xd74
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2432
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xd64
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2416
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xd54
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2400
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xd44
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2384
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xd34
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2368
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xd24
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2352
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xd14
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2336
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xd04
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2320
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xcf4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2304
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xce4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2288
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xcd4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2272
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xcc4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2256
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xcb4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2240
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xca4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2224
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xc94
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2208
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xc84
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2192
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xc74
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2176
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xc64
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2160
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xc54
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2144
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xc44
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2128
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xc34
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2112
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xc24
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2096
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xc14
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2080
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xc04
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2064
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xbf4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2048
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xbe4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2032
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xbd4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2016
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xbc4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2000
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xbb4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1984
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xba4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1968
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xb94
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1952
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xb84
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1936
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xb74
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1920
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xb64
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1904
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xb54
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1888
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xb44
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1872
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xb34
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1856
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xb24
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1840
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xb14
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1824
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xb04
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1808
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xaf4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1792
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xae4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1776
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xad4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1760
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xac4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1744
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xab4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1728
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xaa4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1712
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xa94
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1696
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xa84
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1680
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xa74
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1664
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xa64
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1648
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xa54
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1632
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xa44
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1616
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xa34
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1600
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xa24
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1584
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xa14
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1568
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0xa04
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1552
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x9f4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1536
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x9e4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1520
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x9d4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1504
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x9c4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1488
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x9b4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1472
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x9a4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1456
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x994
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1440
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x984
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1424
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x974
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1408
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x964
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1392
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x954
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1376
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x944
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1360
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x934
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1344
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x924
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1328
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x914
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1312
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x904
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1296
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x8f4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1280
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x8e4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1264
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x8d4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1248
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x8c4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1232
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x8b4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1216
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x8a4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1200
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x894
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1184
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x884
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1168
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x874
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1152
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x864
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1136
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x854
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1120
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x844
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1104
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x834
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1088
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x824
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1072
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x814
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1056
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x804
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1040
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x7f4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1024
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x7e4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1008
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x7d4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:992
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x7c4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:976
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x7b4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:960
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x7a4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:944
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x794
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:928
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x784
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:912
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x774
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:896
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x764
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:880
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x754
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:864
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x744
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:848
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x734
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:832
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x724
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:816
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x714
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:800
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x704
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:784
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x6f4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:768
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x6e4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:752
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x6d4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:736
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x6c4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:720
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x6b4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:704
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x6a4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:688
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x694
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:672
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x684
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:656
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x674
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:640
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x664
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:624
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x654
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:608
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x644
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:592
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x634
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:576
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x624
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:560
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x614
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:544
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x604
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:528
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x5f4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:512
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x5e4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:496
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x5d4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:480
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x5c4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:464
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x5b4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:448
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x5a4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:432
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x594
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:416
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x584
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:400
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x574
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:384
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x564
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:368
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x554
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:352
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x544
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:336
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x534
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:320
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x524
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:304
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x514
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:288
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x504
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:272
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x4f4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:256
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x4e4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:240
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x4d4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:224
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x4c4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:208
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x4b4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:192
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x4a4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:176
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x494
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:160
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x484
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:144
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x474
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:128
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x464
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:112
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x454
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:96
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x444
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:80
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x434
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:64
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x424
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:48
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x414
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:32
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x404
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:16
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x3f4
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    v_add_co_u32_e32 v0, vcc, s10, v4
+; GFX9-FLATSCR-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v6, vcc
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x3e4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4080
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x3d4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4064
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x3c4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4048
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x3b4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4032
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x3a4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4016
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x394
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4000
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x384
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:3984
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x374
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:3968
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    v_add_co_u32_e32 v0, vcc, s9, v4
+; GFX9-FLATSCR-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v6, vcc
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x364
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4080
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x354
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4064
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x344
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4048
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x334
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4032
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x324
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4016
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x314
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4000
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x304
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:3984
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x2f4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:3968
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    v_add_co_u32_e32 v0, vcc, s8, v4
+; GFX9-FLATSCR-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v6, vcc
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x2e4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4080
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x2d4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4064
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x2c4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4048
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x2b4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4032
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x2a4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4016
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x294
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4000
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x284
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:3984
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x274
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:3968
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    v_add_co_u32_e32 v0, vcc, s7, v4
+; GFX9-FLATSCR-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v6, vcc
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x264
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4080
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x254
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4064
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x244
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4048
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x234
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4032
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x224
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4016
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x214
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4000
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x204
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:3984
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x1f4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:3968
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    v_add_co_u32_e32 v0, vcc, s6, v4
+; GFX9-FLATSCR-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v6, vcc
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x1e4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4080
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x1d4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4064
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x1c4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4048
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x1b4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4032
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x1a4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4016
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x194
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4000
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x184
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:3984
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x174
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:3968
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    v_add_co_u32_e32 v0, vcc, s5, v4
+; GFX9-FLATSCR-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v6, vcc
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x164
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4080
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x154
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4064
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x144
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4048
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x134
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4032
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x124
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4016
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x114
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4000
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x104
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:3984
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0xf4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:3968
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    v_add_co_u32_e32 v0, vcc, s4, v4
+; GFX9-FLATSCR-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v6, vcc
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0xe4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4080
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0xd4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4064
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0xc4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4048
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0xb4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4032
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0xa4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4016
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x94
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:4000
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x84
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:3984
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x74
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:3968
+; GFX9-FLATSCR-NEXT:    v_add_co_u32_e32 v0, vcc, 0x80, v4
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[2:5], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v6, vcc
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x64
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off offset:4080
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[2:5], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x54
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off offset:4064
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[2:5], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x44
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off offset:4048
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[2:5], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s0, 52
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off offset:4032
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[2:5], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s0, 36
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off offset:4016
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[2:5], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s0, 20
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off offset:4000
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[2:5], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s0, 4
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off offset:3984
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[2:5], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off offset:3968
+; GFX9-FLATSCR-NEXT:    s_endpgm
+;
+; GFX10-FLATSCR-LABEL: test:
+; GFX10-FLATSCR:       ; %bb.0: ; %entry
+; GFX10-FLATSCR-NEXT:    s_add_u32 s2, s2, s5
+; GFX10-FLATSCR-NEXT:    s_addc_u32 s3, s3, 0
+; GFX10-FLATSCR-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2
+; GFX10-FLATSCR-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3
+; GFX10-FLATSCR-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX10-FLATSCR-NEXT:    v_mbcnt_lo_u32_b32 v0, -1, 0
+; GFX10-FLATSCR-NEXT:    v_mbcnt_hi_u32_b32 v0, -1, v0
+; GFX10-FLATSCR-NEXT:    v_lshlrev_b32_e32 v5, 13, v0
+; GFX10-FLATSCR-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v4, s4, s2, v5
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e64 v22, null, s3, 0, s4
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x804
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v0, vcc_lo, 0x80, v4
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v22, vcc_lo
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v2, vcc_lo, 0x100, v4
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v3, vcc_lo, 0, v22, vcc_lo
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1920
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], off offset:4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1936
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], off offset:20 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1952
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], off offset:36 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1968
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], off offset:52 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1984
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], off offset:68 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:2000
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], off offset:84 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:2016
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], off offset:100 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:2032
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], off offset:116 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[2:3], off offset:1920
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], off offset:132 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[2:3], off offset:1936
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], off offset:148 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[2:3], off offset:1952
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], off offset:164 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[2:3], off offset:1968
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], off offset:180 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[2:3], off offset:1984
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], off offset:196 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[2:3], off offset:2000
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], off offset:212 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[2:3], off offset:2016
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], off offset:228 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[2:3], off offset:2032
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], off offset:244 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v6, vcc_lo, 0x180, v4
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v7, vcc_lo, 0, v22, vcc_lo
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[8:11], v[6:7], off offset:1920
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[8:11], off offset:260 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[8:11], v[6:7], off offset:1936
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[8:11], off offset:276 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[8:11], v[6:7], off offset:1952
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[8:11], off offset:292 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[8:11], v[6:7], off offset:1968
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[8:11], off offset:308 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[8:11], v[6:7], off offset:1984
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[8:11], off offset:324 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[8:11], v[6:7], off offset:2000
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[8:11], off offset:340 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[8:11], v[6:7], off offset:2016
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[8:11], off offset:356 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[8:11], v[6:7], off offset:2032
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[8:11], off offset:372 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v8, vcc_lo, 0x200, v4
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v9, vcc_lo, 0, v22, vcc_lo
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[10:13], v[8:9], off offset:1920
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[10:13], off offset:388 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[10:13], v[8:9], off offset:1936
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[10:13], off offset:404 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[10:13], v[8:9], off offset:1952
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[10:13], off offset:420 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[10:13], v[8:9], off offset:1968
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[10:13], off offset:436 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[10:13], v[8:9], off offset:1984
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[10:13], off offset:452 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[10:13], v[8:9], off offset:2000
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[10:13], off offset:468 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[10:13], v[8:9], off offset:2016
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[10:13], off offset:484 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[10:13], v[8:9], off offset:2032
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[10:13], off offset:500 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v10, vcc_lo, 0x280, v4
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v11, vcc_lo, 0, v22, vcc_lo
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[12:15], v[10:11], off offset:1920
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[12:15], off offset:516 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[12:15], v[10:11], off offset:1936
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[12:15], off offset:532 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[12:15], v[10:11], off offset:1952
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[12:15], off offset:548 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[12:15], v[10:11], off offset:1968
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[12:15], off offset:564 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[12:15], v[10:11], off offset:1984
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[12:15], off offset:580 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[12:15], v[10:11], off offset:2000
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[12:15], off offset:596 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[12:15], v[10:11], off offset:2016
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[12:15], off offset:612 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[12:15], v[10:11], off offset:2032
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[12:15], off offset:628 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v12, vcc_lo, 0x300, v4
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v13, vcc_lo, 0, v22, vcc_lo
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[14:17], v[12:13], off offset:1920
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[14:17], off offset:644 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[14:17], v[12:13], off offset:1936
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[14:17], off offset:660 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[14:17], v[12:13], off offset:1952
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[14:17], off offset:676 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[14:17], v[12:13], off offset:1968
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[14:17], off offset:692 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[14:17], v[12:13], off offset:1984
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[14:17], off offset:708 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[14:17], v[12:13], off offset:2000
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[14:17], off offset:724 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[14:17], v[12:13], off offset:2016
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[14:17], off offset:740 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[14:17], v[12:13], off offset:2032
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[14:17], off offset:756 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v14, vcc_lo, 0x380, v4
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v15, vcc_lo, 0, v22, vcc_lo
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[16:19], v[14:15], off offset:1920
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[16:19], off offset:772 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[16:19], v[14:15], off offset:1936
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[16:19], off offset:788 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[16:19], v[14:15], off offset:1952
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[16:19], off offset:804 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[16:19], v[14:15], off offset:1968
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[16:19], off offset:820 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[16:19], v[14:15], off offset:1984
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[16:19], off offset:836 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[16:19], v[14:15], off offset:2000
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[16:19], off offset:852 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[16:19], v[14:15], off offset:2016
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[16:19], off offset:868 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[16:19], v[14:15], off offset:2032
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[16:19], off offset:884 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v16, vcc_lo, 0x400, v4
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v17, vcc_lo, 0, v22, vcc_lo
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[18:21], v[16:17], off offset:1920
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[18:21], off offset:900 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[18:21], v[16:17], off offset:1936
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[18:21], off offset:916 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[18:21], v[16:17], off offset:1952
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[18:21], off offset:932 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[18:21], v[16:17], off offset:1968
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[18:21], off offset:948 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[18:21], v[16:17], off offset:1984
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[18:21], off offset:964 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[18:21], v[16:17], off offset:2000
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[18:21], off offset:980 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[18:21], v[16:17], off offset:2016
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[18:21], off offset:996 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[18:21], v[16:17], off offset:2032
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[18:21], off offset:1012 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v18, vcc_lo, 0x480, v4
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v19, vcc_lo, 0, v22, vcc_lo
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v20, vcc_lo, 0x500, v4
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v21, vcc_lo, 0, v22, vcc_lo
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[18:19], off offset:1920
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1028 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[18:19], off offset:1936
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1044 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[18:19], off offset:1952
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1060 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[18:19], off offset:1968
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1076 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[18:19], off offset:1984
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1092 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[18:19], off offset:2000
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1108 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[18:19], off offset:2016
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1124 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[18:19], off offset:2032
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1140 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:1920
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1156 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:1936
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1172 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:1952
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1188 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:1968
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1204 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:1984
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1220 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:2000
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1236 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:2016
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1252 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:2032
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v20, vcc_lo, 0x580, v4
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v21, vcc_lo, 0, v22, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1268 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:1920
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1284 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:1936
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1300 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:1952
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1316 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:1968
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1332 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:1984
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1348 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:2000
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1364 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:2016
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1380 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:2032
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v20, vcc_lo, 0x600, v4
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v21, vcc_lo, 0, v22, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1396 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:1920
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1412 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:1936
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1428 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:1952
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1444 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:1968
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1460 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:1984
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1476 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:2000
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1492 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:2016
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1508 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:2032
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v20, vcc_lo, 0x680, v4
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v21, vcc_lo, 0, v22, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1524 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:1920
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1540 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:1936
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1556 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:1952
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1572 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:1968
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1588 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:1984
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1604 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:2000
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1620 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:2016
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1636 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:2032
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v20, vcc_lo, 0x700, v4
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v21, vcc_lo, 0, v22, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1652 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:1920
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1668 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:1936
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1684 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:1952
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1700 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:1968
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1716 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:1984
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1732 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:2000
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1748 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:2016
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1764 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[23:26], v[20:21], off offset:2032
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v20, vcc_lo, 0x780, v4
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v21, vcc_lo, 0, v22, vcc_lo
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v0, vcc_lo, 0x780, v0
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[23:26], off offset:1780 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[22:25], v[20:21], off offset:1920
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[22:25], off offset:1796 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[22:25], v[20:21], off offset:1936
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[22:25], off offset:1812 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[22:25], v[20:21], off offset:1952
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[22:25], off offset:1828 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[22:25], v[20:21], off offset:1968
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[22:25], off offset:1844 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[22:25], v[20:21], off offset:1984
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[22:25], off offset:1860 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[22:25], v[20:21], off offset:2000
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[22:25], off offset:1876 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[22:25], v[20:21], off offset:2016
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[22:25], off offset:1892 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[20:23], v[20:21], off offset:2032
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[20:23], off offset:1908 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[20:23], v[0:1], off offset:1920
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[20:23], off offset:1924 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[20:23], v[0:1], off offset:1936
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[20:23], off offset:1940 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[20:23], v[0:1], off offset:1952
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[20:23], off offset:1956 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[20:23], v[0:1], off offset:1968
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[20:23], off offset:1972 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[20:23], v[0:1], off offset:1984
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[20:23], off offset:1988 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[20:23], v[0:1], off offset:2000
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[20:23], off offset:2004 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[20:23], v[0:1], off offset:2016
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[20:23], off offset:2020 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[20:23], v[0:1], off offset:2032
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v0, vcc_lo, 0x780, v2
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v3, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[20:23], off offset:2036 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[20:23], v[0:1], off offset:1920
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[20:23], v[0:1], off offset:1936
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x814
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[20:23], v[0:1], off offset:1952
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x824
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[20:23], v[0:1], off offset:1968
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x834
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[20:23], v[0:1], off offset:1984
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x844
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[20:23], v[0:1], off offset:2000
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x854
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    s_clause 0x1
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[20:23], v[0:1], off offset:2016
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2032
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x864
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(1)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x874
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v0, vcc_lo, 0x780, v6
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v7, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x884
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[20:23], v[0:1], off offset:1920
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[20:23], v[0:1], off offset:1936
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x894
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[20:23], v[0:1], off offset:1952
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x8a4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[20:23], v[0:1], off offset:1968
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x8b4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[20:23], v[0:1], off offset:1984
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x8c4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[20:23], v[0:1], off offset:2000
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x8d4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    s_clause 0x1
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[20:23], v[0:1], off offset:2016
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2032
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x8e4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(1)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x8f4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v0, vcc_lo, 0x780, v8
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v9, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x904
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1920
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1936
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x914
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1952
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x924
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1968
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x934
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1984
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x944
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:2000
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x954
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    s_clause 0x1
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:2016
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2032
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x964
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(1)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x974
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v0, vcc_lo, 0x780, v10
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v11, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x984
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1920
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1936
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x994
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1952
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x9a4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1968
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x9b4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1984
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x9c4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:2000
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x9d4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    s_clause 0x1
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:2016
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2032
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x9e4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(1)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x9f4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v0, vcc_lo, 0x780, v12
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v13, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xa04
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1920
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1936
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xa14
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1952
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xa24
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1968
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xa34
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1984
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xa44
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:2000
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xa54
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    s_clause 0x1
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:2016
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2032
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xa64
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(1)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xa74
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v0, vcc_lo, 0x780, v14
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v15, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xa84
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1920
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1936
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xa94
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1952
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xaa4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1968
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xab4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1984
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xac4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:2000
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xad4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    s_clause 0x1
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:2016
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2032
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xae4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(1)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xaf4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v0, vcc_lo, 0x780, v16
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v17, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xb04
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1920
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1936
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xb14
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1952
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xb24
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1968
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xb34
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1984
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xb44
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:2000
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xb54
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    s_clause 0x1
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:2016
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2032
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xb64
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(1)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xb74
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v0, vcc_lo, 0x780, v18
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v19, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xb84
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1920
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1936
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xb94
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1952
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xba4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1968
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xbb4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:1984
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xbc4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:2000
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xbd4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    s_clause 0x1
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:2016
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:2032
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xbe4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(1)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xbf4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3]
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xc04
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:16
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xc14
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:32
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xc24
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:48
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xc34
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:64
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xc44
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:80
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xc54
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:96
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xc64
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:112
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xc74
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:128
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xc84
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:144
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xc94
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:160
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xca4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:176
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xcb4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:192
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xcc4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:208
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xcd4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:224
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xce4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:240
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xcf4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:256
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xd04
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:272
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xd14
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:288
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xd24
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:304
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xd34
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:320
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xd44
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:336
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xd54
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:352
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xd64
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:368
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xd74
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:384
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xd84
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:400
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xd94
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:416
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xda4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:432
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xdb4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:448
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xdc4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:464
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xdd4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:480
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xde4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:496
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xdf4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:512
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xe04
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:528
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xe14
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:544
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xe24
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:560
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xe34
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:576
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xe44
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:592
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xe54
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:608
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xe64
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:624
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xe74
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:640
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xe84
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:656
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xe94
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:672
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xea4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:688
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xeb4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:704
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xec4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:720
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xed4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:736
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xee4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:752
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xef4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:768
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xf04
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:784
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xf14
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:800
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xf24
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:816
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xf34
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:832
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xf44
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:848
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xf54
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:864
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xf64
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:880
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xf74
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:896
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xf84
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:912
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xf94
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:928
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xfa4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:944
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xfb4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:960
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xfc4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:976
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xfd4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:992
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xfe4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1008
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0xff4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1024
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1004
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1040
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1014
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1056
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1024
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1072
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1034
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1088
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1044
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1104
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1054
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1120
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1064
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1136
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1074
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1152
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1084
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1168
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1094
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1184
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x10a4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1200
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x10b4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1216
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x10c4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1232
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x10d4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1248
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x10e4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1264
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x10f4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1280
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1104
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1296
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1114
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1312
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1124
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1328
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1134
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1344
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1144
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1360
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1154
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1376
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1164
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1392
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1174
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1408
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1184
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1424
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1194
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1440
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x11a4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1456
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x11b4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1472
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x11c4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1488
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x11d4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1504
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x11e4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1520
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x11f4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1536
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1204
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1552
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1214
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1568
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1224
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1584
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1234
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1600
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1244
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1616
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1254
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1632
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1264
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1648
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1274
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1664
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1284
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1680
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1294
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1696
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x12a4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1712
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x12b4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1728
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x12c4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1744
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x12d4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1760
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x12e4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1776
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x12f4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1792
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1304
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1808
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1314
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1824
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1324
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1840
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1334
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1856
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1344
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1872
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1354
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1888
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1364
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1904
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1374
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1920
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1384
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1936
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x1394
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1952
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x13a4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1968
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x13b4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1984
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x13c4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2000
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x13d4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2016
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s4, 0x13e4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2032
+; GFX10-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX10-FLATSCR-NEXT:    ;;#ASMEND
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v4, s2, s0, v5
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e64 v6, null, s1, 0, s2
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x13e4
+; GFX10-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX10-FLATSCR-NEXT:    ;;#ASMEND
+; GFX10-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX10-FLATSCR-NEXT:    ;;#ASMEND
+; GFX10-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX10-FLATSCR-NEXT:    ;;#ASMEND
+; GFX10-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX10-FLATSCR-NEXT:    ;;#ASMEND
+; GFX10-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX10-FLATSCR-NEXT:    ;;#ASMEND
+; GFX10-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX10-FLATSCR-NEXT:    ;;#ASMEND
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2032
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x13d4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2016
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x13c4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:2000
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x13b4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1984
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x13a4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1968
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1394
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1952
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1384
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1936
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1374
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1920
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1364
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1904
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1354
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1888
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1344
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1872
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1334
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1856
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1324
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1840
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1314
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1824
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1304
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1808
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x12f4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1792
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x12e4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1776
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x12d4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1760
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x12c4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1744
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x12b4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1728
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x12a4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1712
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1294
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1696
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1284
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1680
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1274
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1664
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1264
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1648
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1254
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1632
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1244
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1616
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1234
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1600
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1224
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1584
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1214
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1568
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1204
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1552
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x11f4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1536
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x11e4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1520
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x11d4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1504
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x11c4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1488
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x11b4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1472
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x11a4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1456
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1194
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1440
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1184
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1424
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1174
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1408
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1164
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1392
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1154
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1376
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1144
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1360
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1134
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1344
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1124
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1328
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1114
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1312
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1104
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1296
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x10f4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1280
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x10e4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1264
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x10d4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1248
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x10c4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1232
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x10b4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1216
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x10a4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1200
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1094
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1184
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1084
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1168
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1074
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1152
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1064
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1136
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1054
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1120
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1044
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1104
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1034
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1088
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1024
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1072
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1014
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1056
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0x1004
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1040
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xff4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1024
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xfe4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:1008
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xfd4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:992
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xfc4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:976
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xfb4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:960
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xfa4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:944
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xf94
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:928
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xf84
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:912
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xf74
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:896
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xf64
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:880
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xf54
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:864
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xf44
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:848
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xf34
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:832
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xf24
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:816
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xf14
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:800
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xf04
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:784
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xef4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:768
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xee4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:752
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xed4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:736
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xec4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:720
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xeb4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:704
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xea4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:688
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xe94
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:672
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xe84
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:656
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xe74
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:640
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xe64
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:624
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xe54
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:608
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xe44
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:592
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xe34
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:576
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xe24
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:560
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xe14
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:544
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xe04
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:528
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xdf4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:512
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xde4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:496
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xdd4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:480
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xdc4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:464
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xdb4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:448
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xda4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:432
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xd94
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:416
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xd84
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:400
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xd74
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:384
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xd64
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:368
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xd54
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:352
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xd44
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:336
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xd34
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:320
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xd24
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:304
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xd14
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:288
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xd04
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:272
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xcf4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:256
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xce4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:240
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xcd4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:224
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xcc4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:208
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xcb4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:192
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xca4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:176
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xc94
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:160
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xc84
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:144
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xc74
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:128
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xc64
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:112
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xc54
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:96
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xc44
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:80
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xc34
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:64
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xc24
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:48
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xc14
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:32
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s2, 0xc04
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] offset:16
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0xbf4
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v0, vcc_lo, 0x480, v4
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v6, vcc_lo
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v2, vcc_lo, 0x780, v0
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0xbe4
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v3, vcc_lo, 0, v1, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[2:3], v[7:10], off offset:2032
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0xbd4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[2:3], v[7:10], off offset:2016
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0xbc4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[2:3], v[7:10], off offset:2000
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0xbb4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[2:3], v[7:10], off offset:1984
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0xba4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[2:3], v[7:10], off offset:1968
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0xb94
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[2:3], v[7:10], off offset:1952
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0xb84
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[2:3], v[7:10], off offset:1936
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0xb74
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[2:3], v[7:10], off offset:1920
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[9:12], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v2, vcc_lo, 0x400, v4
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v3, vcc_lo, 0, v6, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0xb64
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v7, vcc_lo, 0x780, v2
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v8, vcc_lo, 0, v3, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[7:8], v[9:12], off offset:2032
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[9:12], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0xb54
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[7:8], v[9:12], off offset:2016
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[9:12], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0xb44
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[7:8], v[9:12], off offset:2000
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[9:12], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0xb34
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[7:8], v[9:12], off offset:1984
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[9:12], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0xb24
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[7:8], v[9:12], off offset:1968
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[9:12], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0xb14
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[7:8], v[9:12], off offset:1952
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[9:12], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0xb04
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[7:8], v[9:12], off offset:1936
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[9:12], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0xaf4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[7:8], v[9:12], off offset:1920
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[11:14], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v7, vcc_lo, 0x380, v4
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v8, vcc_lo, 0, v6, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0xae4
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v9, vcc_lo, 0x780, v7
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v10, vcc_lo, 0, v8, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[9:10], v[11:14], off offset:2032
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[11:14], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0xad4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[9:10], v[11:14], off offset:2016
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[11:14], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0xac4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[9:10], v[11:14], off offset:2000
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[11:14], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0xab4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[9:10], v[11:14], off offset:1984
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[11:14], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0xaa4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[9:10], v[11:14], off offset:1968
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[11:14], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0xa94
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[9:10], v[11:14], off offset:1952
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[11:14], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0xa84
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[9:10], v[11:14], off offset:1936
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[11:14], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0xa74
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[9:10], v[11:14], off offset:1920
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[13:16], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v9, vcc_lo, 0x300, v4
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v10, vcc_lo, 0, v6, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0xa64
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v11, vcc_lo, 0x780, v9
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v12, vcc_lo, 0, v10, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[11:12], v[13:16], off offset:2032
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[13:16], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0xa54
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[11:12], v[13:16], off offset:2016
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[13:16], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0xa44
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[11:12], v[13:16], off offset:2000
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[13:16], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0xa34
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[11:12], v[13:16], off offset:1984
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[13:16], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0xa24
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[11:12], v[13:16], off offset:1968
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[13:16], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0xa14
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[11:12], v[13:16], off offset:1952
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[13:16], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0xa04
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[11:12], v[13:16], off offset:1936
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[13:16], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0x9f4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[11:12], v[13:16], off offset:1920
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[15:18], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v11, vcc_lo, 0x280, v4
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v12, vcc_lo, 0, v6, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0x9e4
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v13, vcc_lo, 0x780, v11
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v14, vcc_lo, 0, v12, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[13:14], v[15:18], off offset:2032
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[15:18], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0x9d4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[13:14], v[15:18], off offset:2016
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[15:18], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0x9c4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[13:14], v[15:18], off offset:2000
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[15:18], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0x9b4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[13:14], v[15:18], off offset:1984
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[15:18], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0x9a4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[13:14], v[15:18], off offset:1968
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[15:18], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0x994
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[13:14], v[15:18], off offset:1952
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[15:18], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0x984
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[13:14], v[15:18], off offset:1936
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[15:18], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0x974
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[13:14], v[15:18], off offset:1920
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[17:20], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v13, vcc_lo, 0x200, v4
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v14, vcc_lo, 0, v6, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0x964
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v15, vcc_lo, 0x780, v13
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v16, vcc_lo, 0, v14, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[15:16], v[17:20], off offset:2032
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[17:20], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0x954
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[15:16], v[17:20], off offset:2016
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[17:20], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0x944
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[15:16], v[17:20], off offset:2000
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[17:20], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0x934
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[15:16], v[17:20], off offset:1984
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[17:20], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0x924
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[15:16], v[17:20], off offset:1968
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[17:20], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0x914
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[15:16], v[17:20], off offset:1952
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[17:20], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0x904
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[15:16], v[17:20], off offset:1936
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[17:20], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0x8f4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[15:16], v[17:20], off offset:1920
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[19:22], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v15, vcc_lo, 0x180, v4
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v16, vcc_lo, 0, v6, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0x8e4
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v17, vcc_lo, 0x780, v15
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v18, vcc_lo, 0, v16, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[17:18], v[19:22], off offset:2032
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[19:22], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0x8d4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[17:18], v[19:22], off offset:2016
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[19:22], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0x8c4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[17:18], v[19:22], off offset:2000
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[19:22], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0x8b4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[17:18], v[19:22], off offset:1984
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[19:22], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0x8a4
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[17:18], v[19:22], off offset:1968
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[19:22], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0x894
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[17:18], v[19:22], off offset:1952
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[19:22], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0x884
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[17:18], v[19:22], off offset:1936
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[19:22], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0x874
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[17:18], v[19:22], off offset:1920
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[21:24], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v17, vcc_lo, 0x100, v4
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v18, vcc_lo, 0, v6, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0x864
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v19, vcc_lo, 0x780, v17
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v20, vcc_lo, 0, v18, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[19:20], v[21:24], off offset:2032
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[21:24], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0x854
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[19:20], v[21:24], off offset:2016
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[21:24], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0x844
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[19:20], v[21:24], off offset:2000
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[21:24], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0x834
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[19:20], v[21:24], off offset:1984
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[21:24], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0x824
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[19:20], v[21:24], off offset:1968
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[21:24], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0x814
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[19:20], v[21:24], off offset:1952
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[21:24], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0x804
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[19:20], v[21:24], off offset:1936
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[21:24], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[19:20], v[21:24], off offset:1920
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:2036 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v19, vcc_lo, 0x80, v4
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v20, vcc_lo, 0, v6, vcc_lo
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v21, vcc_lo, 0x780, v19
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v22, vcc_lo, 0, v20, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:2032
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:2020 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:2016
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:2004 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:2000
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1988 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:1984
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1972 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:1968
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1956 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:1952
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1940 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:1936
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1924 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:1920
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1908 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v21, vcc_lo, 0x780, v4
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v22, vcc_lo, 0, v6, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:2032
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1892 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:2016
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1876 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:2000
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1860 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:1984
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1844 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:1968
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1828 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:1952
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1812 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:1936
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1796 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:1920
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1780 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v21, vcc_lo, 0x700, v4
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v22, vcc_lo, 0, v6, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:2032
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1764 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:2016
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1748 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:2000
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1732 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:1984
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1716 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:1968
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1700 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:1952
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1684 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:1936
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1668 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:1920
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1652 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v21, vcc_lo, 0x680, v4
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v22, vcc_lo, 0, v6, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:2032
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1636 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:2016
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1620 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:2000
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1604 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:1984
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1588 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:1968
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1572 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:1952
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1556 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:1936
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1540 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:1920
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1524 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v21, vcc_lo, 0x600, v4
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v22, vcc_lo, 0, v6, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:2032
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1508 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:2016
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1492 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:2000
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1476 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:1984
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1460 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:1968
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1444 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:1952
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1428 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:1936
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1412 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:1920
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1396 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v21, vcc_lo, 0x580, v4
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v22, vcc_lo, 0, v6, vcc_lo
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v4, vcc_lo, 0x500, v4
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v5, vcc_lo, 0, v6, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:2032
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1380 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:2016
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1364 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:2000
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1348 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:1984
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1332 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:1968
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1316 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:1952
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1300 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:1936
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[23:26], off, off offset:1284 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[21:22], v[23:26], off offset:1920
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[21:24], off, off offset:1268 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[21:24], off offset:2032
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[21:24], off, off offset:1252 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[21:24], off offset:2016
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[21:24], off, off offset:1236 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[21:24], off offset:2000
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[21:24], off, off offset:1220 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[21:24], off offset:1984
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[21:24], off, off offset:1204 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[21:24], off offset:1968
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[21:24], off, off offset:1188 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[21:24], off offset:1952
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[21:24], off, off offset:1172 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[21:24], off offset:1936
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[21:24], off, off offset:1156 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[21:24], off offset:1920
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[21:24], off, off offset:1140 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[21:24], off offset:2032
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[21:24], off, off offset:1124 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[21:24], off offset:2016
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[21:24], off, off offset:1108 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[21:24], off offset:2000
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[21:24], off, off offset:1092 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[21:24], off offset:1984
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[21:24], off, off offset:1076 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[21:24], off offset:1968
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[21:24], off, off offset:1060 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[21:24], off offset:1952
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[21:24], off, off offset:1044 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[21:24], off offset:1936
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[21:24], off, off offset:1028 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[21:24], off offset:1920
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[21:24], off, off offset:1012 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[2:3], v[21:24], off offset:2032
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[21:24], off, off offset:996 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[2:3], v[21:24], off offset:2016
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[21:24], off, off offset:980 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[2:3], v[21:24], off offset:2000
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[21:24], off, off offset:964 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[2:3], v[21:24], off offset:1984
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[21:24], off, off offset:948 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[2:3], v[21:24], off offset:1968
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[21:24], off, off offset:932 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[2:3], v[21:24], off offset:1952
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[21:24], off, off offset:916 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[2:3], v[21:24], off offset:1936
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[21:24], off, off offset:900 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[2:3], v[21:24], off offset:1920
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:884 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[7:8], v[0:3], off offset:2032
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:868 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[7:8], v[0:3], off offset:2016
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:852 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[7:8], v[0:3], off offset:2000
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:836 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[7:8], v[0:3], off offset:1984
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:820 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[7:8], v[0:3], off offset:1968
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:804 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[7:8], v[0:3], off offset:1952
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:788 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[7:8], v[0:3], off offset:1936
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:772 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[7:8], v[0:3], off offset:1920
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:756 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[9:10], v[0:3], off offset:2032
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:740 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[9:10], v[0:3], off offset:2016
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:724 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[9:10], v[0:3], off offset:2000
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:708 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[9:10], v[0:3], off offset:1984
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:692 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[9:10], v[0:3], off offset:1968
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:676 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[9:10], v[0:3], off offset:1952
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:660 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[9:10], v[0:3], off offset:1936
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:644 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[9:10], v[0:3], off offset:1920
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:628 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[11:12], v[0:3], off offset:2032
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:612 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[11:12], v[0:3], off offset:2016
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:596 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[11:12], v[0:3], off offset:2000
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:580 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[11:12], v[0:3], off offset:1984
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:564 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[11:12], v[0:3], off offset:1968
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:548 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[11:12], v[0:3], off offset:1952
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:532 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[11:12], v[0:3], off offset:1936
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:516 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[11:12], v[0:3], off offset:1920
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:500 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[13:14], v[0:3], off offset:2032
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:484 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[13:14], v[0:3], off offset:2016
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:468 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[13:14], v[0:3], off offset:2000
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:452 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[13:14], v[0:3], off offset:1984
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:436 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[13:14], v[0:3], off offset:1968
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:420 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[13:14], v[0:3], off offset:1952
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:404 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[13:14], v[0:3], off offset:1936
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:388 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[13:14], v[0:3], off offset:1920
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:372 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[15:16], v[0:3], off offset:2032
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:356 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[15:16], v[0:3], off offset:2016
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:340 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[15:16], v[0:3], off offset:2000
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:324 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[15:16], v[0:3], off offset:1984
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:308 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[15:16], v[0:3], off offset:1968
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:292 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[15:16], v[0:3], off offset:1952
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:276 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[15:16], v[0:3], off offset:1936
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:260 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[15:16], v[0:3], off offset:1920
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:244 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[17:18], v[0:3], off offset:2032
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:228 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[17:18], v[0:3], off offset:2016
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:212 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[17:18], v[0:3], off offset:2000
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:196 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[17:18], v[0:3], off offset:1984
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:180 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[17:18], v[0:3], off offset:1968
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:164 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[17:18], v[0:3], off offset:1952
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:148 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[17:18], v[0:3], off offset:1936
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:132 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[17:18], v[0:3], off offset:1920
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:116 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[19:20], v[0:3], off offset:2032
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:100 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[19:20], v[0:3], off offset:2016
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:84 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[19:20], v[0:3], off offset:2000
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:68 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[19:20], v[0:3], off offset:1984
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:52 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[19:20], v[0:3], off offset:1968
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:36 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[19:20], v[0:3], off offset:1952
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:20 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[19:20], v[0:3], off offset:1936
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, off offset:4 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[19:20], v[0:3], off offset:1920
+; GFX10-FLATSCR-NEXT:    s_endpgm
 entry:
   %lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
   %tid = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %lo)
@@ -45,79 +10073,1028 @@ entry:
   ret void
 }
 
-; CHECK-LABEL: test_limited_sgpr
-; GFX6: %bb.1:
-; GFX6: s_mov_b64 exec, 0xff
-; GFX6: buffer_store_dword [[SPILL_REG_0:v[0-9]+]]
-; GFX6-COUNT-8: v_writelane_b32 [[SPILL_REG_0]]
-; GFX6: v_mov_b32_e32 [[OFFSET_REG0:v[0-9]+]], 0x[[OFFSET0:[0-9a-f]+]]
-; GFX6: buffer_store_dword [[SPILL_REG_0]], [[OFFSET_REG0]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen
-; GFX6: buffer_load_dword [[SPILL_REG_0]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
-; GFX6: s_mov_b64 exec, s
-
-
-; GFX6: s_mov_b64 exec, 0xff
-; GFX6: v_mov_b32_e32 [[RELOAD_OFFSET_REG0:v[0-9]+]], 0x[[RELOAD_OFFSET0:[0-9a-f]+]]
-; GFX6: buffer_store_dword [[RELOAD_REG_0:v[0-9]+]], off,
-; GFX6: buffer_load_dword [[RELOAD_REG_0]], [[RELOAD_OFFSET_REG0]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen
-; GFX6-COUNT-8: v_readlane_b32 s{{[0-9]+}}, [[RELOAD_REG_0]]
-; GFX6: buffer_load_dword [[RELOAD_REG_0]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
-; GFX6: s_mov_b64 exec,
-
-
-; GFX6: s_mov_b64 exec, 0xff
-; GFX6: buffer_store_dword [[SPILL_REG_1:v[0-9]+]]
-; GFX6-COUNT-8: v_writelane_b32 [[SPILL_REG_1]]
-; GFX6: v_mov_b32_e32 [[OFFSET_REG1:v[0-9]+]], 0x[[OFFSET1:[0-9a-f]+]]
-; GFX6: buffer_store_dword [[SPILL_REG_1]], [[OFFSET_REG1]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen
-; GFX6: buffer_load_dword [[SPILL_REG_1]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
-; GFX6: s_mov_b64 exec, s
-
-
-; GFX6: s_mov_b64 exec, 0xff
-; GFX6: v_mov_b32_e32 [[RELOAD_OFFSET_REG1:v[0-9]+]], 0x[[RELOAD_OFFSET1:[0-9a-f]+]]
-; GFX6: buffer_store_dword [[RELOAD_REG_1:v[0-9]+]], off,
-; GFX6: buffer_load_dword [[RELOAD_REG_1]], [[RELOAD_OFFSET_REG1]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen
-; GFX6-COUNT-8: v_readlane_b32 s{{[0-9]+}}, [[RELOAD_REG_1]]
-; GFX6: buffer_load_dword [[RELOAD_REG_1]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
-; GFX6: s_mov_b64 exec,
-
-
-; GFX6: s_mov_b64 exec, 0xff
-; GFX6: buffer_store_dword [[SPILL_REG_2:v[0-9]+]]
-; GFX6-COUNT-8: v_writelane_b32 [[SPILL_REG_2]]
-; GFX6: v_mov_b32_e32 [[OFFSET_REG2:v[0-9]+]], 0x[[OFFSET2:[0-9a-f]+]]
-; GFX6: buffer_store_dword [[SPILL_REG_2]], [[OFFSET_REG2]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen
-; GFX6: buffer_load_dword [[SPILL_REG_2]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
-; GFX6: s_mov_b64 exec, s
-
-
-; GFX6: s_mov_b64 exec, 0xff
-; GFX6: buffer_store_dword [[SPILL_REG_3:v[0-9]+]]
-; GFX6-COUNT-8: v_writelane_b32 [[SPILL_REG_3]]
-; GFX6: v_mov_b32_e32 [[OFFSET_REG3:v[0-9]+]], 0x[[OFFSET3:[0-9a-f]+]]
-; GFX6: buffer_store_dword [[SPILL_REG_3]], [[OFFSET_REG3]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen
-; GFX6: buffer_load_dword [[SPILL_REG_3]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
-; GFX6: s_mov_b64 exec, s
-
-
-; GFX6: s_mov_b64 exec, 0xff
-; GFX6: buffer_store_dword [[SPILL_REG_4:v[0-9]+]]
-; GFX6-COUNT-4: v_writelane_b32 [[SPILL_REG_4]]
-; GFX6: v_mov_b32_e32 [[OFFSET_REG4:v[0-9]+]], 0x[[OFFSET4:[0-9a-f]+]]
-; GFX6: buffer_store_dword [[SPILL_REG_4]], [[OFFSET_REG4]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen
-; GFX6: buffer_load_dword [[SPILL_REG_4]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
-; GFX6: s_mov_b64 exec, s
-
-; GFX6: NumSgprs: 48
-; GFX6: ScratchSize: 8608
-
-; FLATSCR:           s_movk_i32 [[SOFF1:s[0-9]+]], 0x
-; GFX9-FLATSCR:      s_waitcnt vmcnt(0)
-; FLATSCR:           scratch_store_dwordx4 off, v[{{[0-9:]+}}], [[SOFF1]] ; 16-byte Folded Spill
-; FLATSCR:           s_movk_i32 [[SOFF2:s[0-9]+]], 0x
-; FLATSCR:           scratch_load_dwordx4 v[{{[0-9:]+}}], off, [[SOFF2]] ; 16-byte Folded Reload
 define amdgpu_kernel void @test_limited_sgpr(<64 x i32> addrspace(1)* %out, <64 x i32> addrspace(1)* %in) #0 {
+; GFX6-LABEL: test_limited_sgpr:
+; GFX6:       ; %bb.0: ; %entry
+; GFX6-NEXT:    s_mov_b32 s40, SCRATCH_RSRC_DWORD0
+; GFX6-NEXT:    s_mov_b32 s41, SCRATCH_RSRC_DWORD1
+; GFX6-NEXT:    s_mov_b32 s42, -1
+; GFX6-NEXT:    s_mov_b32 s43, 0xe8f000
+; GFX6-NEXT:    s_add_u32 s40, s40, s3
+; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; GFX6-NEXT:    v_mbcnt_lo_u32_b32_e64 v0, -1, 0
+; GFX6-NEXT:    v_mbcnt_hi_u32_b32_e32 v5, -1, v0
+; GFX6-NEXT:    v_mov_b32_e32 v6, 0
+; GFX6-NEXT:    s_mov_b32 s38, 0
+; GFX6-NEXT:    s_mov_b32 s39, 0xf000
+; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX6-NEXT:    s_mov_b64 s[36:37], s[2:3]
+; GFX6-NEXT:    v_lshlrev_b32_e32 v7, 8, v5
+; GFX6-NEXT:    v_mov_b32_e32 v8, v6
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 offset:240
+; GFX6-NEXT:    s_addc_u32 s41, s41, 0
+; GFX6-NEXT:    s_mov_b32 s2, 0x83400
+; GFX6-NEXT:    s_mov_b64 s[44:45], exec
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 offset:224
+; GFX6-NEXT:    s_mov_b32 s2, 0x83000
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 offset:208
+; GFX6-NEXT:    s_mov_b32 s2, 0x82c00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 offset:192
+; GFX6-NEXT:    s_mov_b32 s2, 0x82800
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 offset:176
+; GFX6-NEXT:    s_mov_b32 s2, 0x82400
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 offset:160
+; GFX6-NEXT:    s_mov_b32 s2, 0x82000
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 offset:144
+; GFX6-NEXT:    s_mov_b32 s2, 0x81c00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 offset:128
+; GFX6-NEXT:    s_mov_b32 s2, 0x81800
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 offset:112
+; GFX6-NEXT:    s_mov_b32 s2, 0x81400
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 offset:96
+; GFX6-NEXT:    s_mov_b32 s2, 0x81000
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 offset:80
+; GFX6-NEXT:    s_mov_b32 s2, 0x80c00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 offset:64
+; GFX6-NEXT:    s_mov_b32 s2, 0x80400
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64
+; GFX6-NEXT:    buffer_load_dwordx4 v[9:12], v[7:8], s[36:39], 0 addr64 offset:16
+; GFX6-NEXT:    s_mov_b32 s2, 0x80800
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v9, off, s[40:43], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v10, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v11, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v12, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_load_dwordx4 v[13:16], v[7:8], s[36:39], 0 addr64 offset:32
+; GFX6-NEXT:    buffer_load_dwordx4 v[17:20], v[7:8], s[36:39], 0 addr64 offset:48
+; GFX6-NEXT:    v_lshlrev_b32_e32 v4, 13, v0
+; GFX6-NEXT:    v_add_i32_e32 v4, vcc, 16, v4
+; GFX6-NEXT:    v_mov_b32_e32 v7, 1
+; GFX6-NEXT:    buffer_store_dword v7, v4, s[40:43], 0 offen
+; GFX6-NEXT:    ;;#ASMSTART
+; GFX6-NEXT:    ; def s[4:11]
+; GFX6-NEXT:    ;;#ASMEND
+; GFX6-NEXT:    s_mov_b64 exec, 0xff
+; GFX6-NEXT:    buffer_store_dword v4, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    v_writelane_b32 v4, s4, 0
+; GFX6-NEXT:    v_writelane_b32 v4, s5, 1
+; GFX6-NEXT:    v_writelane_b32 v4, s6, 2
+; GFX6-NEXT:    v_writelane_b32 v4, s7, 3
+; GFX6-NEXT:    v_writelane_b32 v4, s8, 4
+; GFX6-NEXT:    v_writelane_b32 v4, s9, 5
+; GFX6-NEXT:    v_writelane_b32 v4, s10, 6
+; GFX6-NEXT:    v_writelane_b32 v4, s11, 7
+; GFX6-NEXT:    s_mov_b32 s2, 0x83800
+; GFX6-NEXT:    buffer_store_dword v4, off, s[40:43], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v4, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    s_mov_b64 exec, s[44:45]
+; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX6-NEXT:    ;;#ASMSTART
+; GFX6-NEXT:    ; def s[8:15]
+; GFX6-NEXT:    ;;#ASMEND
+; GFX6-NEXT:    ;;#ASMSTART
+; GFX6-NEXT:    ; def s[16:23]
+; GFX6-NEXT:    ;;#ASMEND
+; GFX6-NEXT:    ;;#ASMSTART
+; GFX6-NEXT:    ; def s[24:31]
+; GFX6-NEXT:    ;;#ASMEND
+; GFX6-NEXT:    ;;#ASMSTART
+; GFX6-NEXT:    ; def s[4:7]
+; GFX6-NEXT:    ;;#ASMEND
+; GFX6-NEXT:    ;;#ASMSTART
+; GFX6-NEXT:    ; def s[2:3]
+; GFX6-NEXT:    ;;#ASMEND
+; GFX6-NEXT:    ;;#ASMSTART
+; GFX6-NEXT:    ; def s[36:37]
+; GFX6-NEXT:    ;;#ASMEND
+; GFX6-NEXT:    ;;#ASMSTART
+; GFX6-NEXT:    ; def s33
+; GFX6-NEXT:    ;;#ASMEND
+; GFX6-NEXT:    s_and_saveexec_b64 s[34:35], vcc
+; GFX6-NEXT:    s_cbranch_execz .LBB1_2
+; GFX6-NEXT:  ; %bb.1: ; %bb0
+; GFX6-NEXT:    s_mov_b64 s[44:45], exec
+; GFX6-NEXT:    s_mov_b64 exec, 0xff
+; GFX6-NEXT:    buffer_store_dword v9, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    v_writelane_b32 v9, s8, 0
+; GFX6-NEXT:    v_writelane_b32 v9, s9, 1
+; GFX6-NEXT:    v_writelane_b32 v9, s10, 2
+; GFX6-NEXT:    v_writelane_b32 v9, s11, 3
+; GFX6-NEXT:    v_writelane_b32 v9, s12, 4
+; GFX6-NEXT:    v_writelane_b32 v9, s13, 5
+; GFX6-NEXT:    v_writelane_b32 v9, s14, 6
+; GFX6-NEXT:    v_writelane_b32 v9, s15, 7
+; GFX6-NEXT:    v_mov_b32_e32 v4, 0x2100
+; GFX6-NEXT:    buffer_store_dword v9, v4, s[40:43], 0 offen ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    s_mov_b64 exec, s[44:45]
+; GFX6-NEXT:    s_mov_b64 s[44:45], exec
+; GFX6-NEXT:    s_mov_b64 exec, 0xff
+; GFX6-NEXT:    v_mov_b32_e32 v4, 0x20e0
+; GFX6-NEXT:    buffer_store_dword v8, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v8, v4, s[40:43], 0 offen ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    v_readlane_b32 s8, v8, 0
+; GFX6-NEXT:    v_readlane_b32 s9, v8, 1
+; GFX6-NEXT:    v_readlane_b32 s10, v8, 2
+; GFX6-NEXT:    v_readlane_b32 s11, v8, 3
+; GFX6-NEXT:    v_readlane_b32 s12, v8, 4
+; GFX6-NEXT:    v_readlane_b32 s13, v8, 5
+; GFX6-NEXT:    v_readlane_b32 s14, v8, 6
+; GFX6-NEXT:    v_readlane_b32 s15, v8, 7
+; GFX6-NEXT:    buffer_load_dword v8, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    s_mov_b64 exec, s[44:45]
+; GFX6-NEXT:    s_mov_b64 s[44:45], exec
+; GFX6-NEXT:    s_mov_b64 exec, 0xff
+; GFX6-NEXT:    buffer_store_dword v7, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    v_writelane_b32 v7, s16, 0
+; GFX6-NEXT:    v_writelane_b32 v7, s17, 1
+; GFX6-NEXT:    v_writelane_b32 v7, s18, 2
+; GFX6-NEXT:    v_writelane_b32 v7, s19, 3
+; GFX6-NEXT:    v_writelane_b32 v7, s20, 4
+; GFX6-NEXT:    v_writelane_b32 v7, s21, 5
+; GFX6-NEXT:    v_writelane_b32 v7, s22, 6
+; GFX6-NEXT:    v_writelane_b32 v7, s23, 7
+; GFX6-NEXT:    v_mov_b32_e32 v4, 0x2120
+; GFX6-NEXT:    buffer_store_dword v7, v4, s[40:43], 0 offen ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v7, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    s_mov_b64 exec, s[44:45]
+; GFX6-NEXT:    s_mov_b64 s[44:45], exec
+; GFX6-NEXT:    s_mov_b64 exec, 0xff
+; GFX6-NEXT:    v_mov_b32_e32 v4, 0x2100
+; GFX6-NEXT:    buffer_store_dword v9, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, v4, s[40:43], 0 offen ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    v_readlane_b32 s16, v9, 0
+; GFX6-NEXT:    v_readlane_b32 s17, v9, 1
+; GFX6-NEXT:    v_readlane_b32 s18, v9, 2
+; GFX6-NEXT:    v_readlane_b32 s19, v9, 3
+; GFX6-NEXT:    v_readlane_b32 s20, v9, 4
+; GFX6-NEXT:    v_readlane_b32 s21, v9, 5
+; GFX6-NEXT:    v_readlane_b32 s22, v9, 6
+; GFX6-NEXT:    v_readlane_b32 s23, v9, 7
+; GFX6-NEXT:    buffer_load_dword v9, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    s_mov_b64 exec, s[44:45]
+; GFX6-NEXT:    s_mov_b64 s[44:45], exec
+; GFX6-NEXT:    s_mov_b64 exec, 0xff
+; GFX6-NEXT:    buffer_store_dword v8, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    v_writelane_b32 v8, s24, 0
+; GFX6-NEXT:    v_writelane_b32 v8, s25, 1
+; GFX6-NEXT:    v_writelane_b32 v8, s26, 2
+; GFX6-NEXT:    v_writelane_b32 v8, s27, 3
+; GFX6-NEXT:    v_writelane_b32 v8, s28, 4
+; GFX6-NEXT:    v_writelane_b32 v8, s29, 5
+; GFX6-NEXT:    v_writelane_b32 v8, s30, 6
+; GFX6-NEXT:    v_writelane_b32 v8, s31, 7
+; GFX6-NEXT:    v_mov_b32_e32 v4, 0x2140
+; GFX6-NEXT:    buffer_store_dword v8, v4, s[40:43], 0 offen ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v8, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    s_mov_b64 exec, s[44:45]
+; GFX6-NEXT:    s_mov_b64 s[44:45], exec
+; GFX6-NEXT:    s_mov_b64 exec, 0xff
+; GFX6-NEXT:    v_mov_b32_e32 v4, 0x2120
+; GFX6-NEXT:    buffer_store_dword v7, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v7, v4, s[40:43], 0 offen ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    v_readlane_b32 s24, v7, 0
+; GFX6-NEXT:    v_readlane_b32 s25, v7, 1
+; GFX6-NEXT:    v_readlane_b32 s26, v7, 2
+; GFX6-NEXT:    v_readlane_b32 s27, v7, 3
+; GFX6-NEXT:    v_readlane_b32 s28, v7, 4
+; GFX6-NEXT:    v_readlane_b32 s29, v7, 5
+; GFX6-NEXT:    v_readlane_b32 s30, v7, 6
+; GFX6-NEXT:    v_readlane_b32 s31, v7, 7
+; GFX6-NEXT:    buffer_load_dword v7, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    s_mov_b64 exec, s[44:45]
+; GFX6-NEXT:    s_mov_b64 s[44:45], exec
+; GFX6-NEXT:    s_mov_b64 exec, 15
+; GFX6-NEXT:    buffer_store_dword v10, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    v_writelane_b32 v10, s0, 0
+; GFX6-NEXT:    v_writelane_b32 v10, s1, 1
+; GFX6-NEXT:    v_writelane_b32 v10, s2, 2
+; GFX6-NEXT:    v_writelane_b32 v10, s3, 3
+; GFX6-NEXT:    v_mov_b32_e32 v4, 0x2160
+; GFX6-NEXT:    buffer_store_dword v10, v4, s[40:43], 0 offen ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v10, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    s_mov_b64 exec, s[44:45]
+; GFX6-NEXT:    s_mov_b64 s[44:45], exec
+; GFX6-NEXT:    s_mov_b64 exec, 15
+; GFX6-NEXT:    buffer_store_dword v8, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    v_writelane_b32 v8, s4, 0
+; GFX6-NEXT:    v_writelane_b32 v8, s5, 1
+; GFX6-NEXT:    v_writelane_b32 v8, s6, 2
+; GFX6-NEXT:    v_writelane_b32 v8, s7, 3
+; GFX6-NEXT:    s_mov_b32 s0, 0x85c00
+; GFX6-NEXT:    buffer_store_dword v8, off, s[40:43], s0 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v8, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    s_mov_b64 exec, s[44:45]
+; GFX6-NEXT:    s_mov_b64 s[0:1], exec
+; GFX6-NEXT:    s_mov_b64 exec, 3
+; GFX6-NEXT:    buffer_store_dword v9, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    v_writelane_b32 v9, s2, 0
+; GFX6-NEXT:    v_writelane_b32 v9, s3, 1
+; GFX6-NEXT:    s_mov_b32 s4, 0x86600
+; GFX6-NEXT:    buffer_store_dword v9, off, s[40:43], s4 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    s_mov_b64 exec, s[0:1]
+; GFX6-NEXT:    s_mov_b64 s[44:45], exec
+; GFX6-NEXT:    s_mov_b64 exec, 0xff
+; GFX6-NEXT:    v_mov_b32_e32 v4, 0x2140
+; GFX6-NEXT:    buffer_store_dword v7, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v7, v4, s[40:43], 0 offen ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    v_readlane_b32 s0, v7, 0
+; GFX6-NEXT:    v_readlane_b32 s1, v7, 1
+; GFX6-NEXT:    v_readlane_b32 s2, v7, 2
+; GFX6-NEXT:    v_readlane_b32 s3, v7, 3
+; GFX6-NEXT:    v_readlane_b32 s4, v7, 4
+; GFX6-NEXT:    v_readlane_b32 s5, v7, 5
+; GFX6-NEXT:    v_readlane_b32 s6, v7, 6
+; GFX6-NEXT:    v_readlane_b32 s7, v7, 7
+; GFX6-NEXT:    buffer_load_dword v7, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    s_mov_b64 exec, s[44:45]
+; GFX6-NEXT:    s_mov_b64 s[44:45], exec
+; GFX6-NEXT:    s_mov_b64 exec, 15
+; GFX6-NEXT:    buffer_store_dword v8, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    v_writelane_b32 v8, s36, 0
+; GFX6-NEXT:    v_writelane_b32 v8, s37, 1
+; GFX6-NEXT:    v_writelane_b32 v8, s38, 2
+; GFX6-NEXT:    v_writelane_b32 v8, s39, 3
+; GFX6-NEXT:    v_mov_b32_e32 v4, 0x2180
+; GFX6-NEXT:    buffer_store_dword v8, v4, s[40:43], 0 offen ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v8, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    s_mov_b64 exec, s[44:45]
+; GFX6-NEXT:    s_mov_b64 s[38:39], exec
+; GFX6-NEXT:    s_mov_b64 exec, 3
+; GFX6-NEXT:    buffer_store_dword v10, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    v_writelane_b32 v10, s36, 0
+; GFX6-NEXT:    v_writelane_b32 v10, s37, 1
+; GFX6-NEXT:    s_mov_b32 s44, 0x86400
+; GFX6-NEXT:    buffer_store_dword v10, off, s[40:43], s44 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v10, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    s_mov_b64 exec, s[38:39]
+; GFX6-NEXT:    s_mov_b64 s[44:45], exec
+; GFX6-NEXT:    s_mov_b64 exec, 15
+; GFX6-NEXT:    v_mov_b32_e32 v4, 0x2170
+; GFX6-NEXT:    buffer_store_dword v9, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v9, v4, s[40:43], 0 offen ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    v_readlane_b32 s36, v9, 0
+; GFX6-NEXT:    v_readlane_b32 s37, v9, 1
+; GFX6-NEXT:    v_readlane_b32 s38, v9, 2
+; GFX6-NEXT:    v_readlane_b32 s39, v9, 3
+; GFX6-NEXT:    buffer_load_dword v9, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    s_mov_b64 exec, s[44:45]
+; GFX6-NEXT:    s_not_b64 exec, exec
+; GFX6-NEXT:    v_mov_b32_e32 v4, 0x2190
+; GFX6-NEXT:    buffer_store_dword v7, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v7, v4, s[40:43], 0 offen ; 4-byte Folded Reload
+; GFX6-NEXT:    s_not_b64 exec, exec
+; GFX6-NEXT:    v_mov_b32_e32 v4, 0x2190
+; GFX6-NEXT:    buffer_load_dword v7, v4, s[40:43], 0 offen ; 4-byte Folded Reload
+; GFX6-NEXT:    s_not_b64 exec, exec
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    v_readlane_b32 s44, v7, 0
+; GFX6-NEXT:    v_readlane_b32 s45, v7, 1
+; GFX6-NEXT:    buffer_load_dword v7, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    s_not_b64 exec, exec
+; GFX6-NEXT:    s_mov_b64 vcc, s[34:35]
+; GFX6-NEXT:    s_not_b64 exec, exec
+; GFX6-NEXT:    v_mov_b32_e32 v4, 0x2198
+; GFX6-NEXT:    buffer_store_dword v8, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v8, v4, s[40:43], 0 offen ; 4-byte Folded Reload
+; GFX6-NEXT:    s_not_b64 exec, exec
+; GFX6-NEXT:    v_mov_b32_e32 v4, 0x2198
+; GFX6-NEXT:    buffer_load_dword v8, v4, s[40:43], 0 offen ; 4-byte Folded Reload
+; GFX6-NEXT:    s_not_b64 exec, exec
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    v_readlane_b32 s34, v8, 0
+; GFX6-NEXT:    v_readlane_b32 s35, v8, 1
+; GFX6-NEXT:    buffer_load_dword v8, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    s_not_b64 exec, exec
+; GFX6-NEXT:    ;;#ASMSTART
+; GFX6-NEXT:    ; use s[8:15],s[16:23],s[24:31],s[0:7],s[36:39],s[34:35],s[44:45]
+; GFX6-NEXT:    ;;#ASMEND
+; GFX6-NEXT:    s_mov_b64 s[34:35], vcc
+; GFX6-NEXT:    s_mov_b64 s[8:9], exec
+; GFX6-NEXT:    s_mov_b64 exec, 15
+; GFX6-NEXT:    s_mov_b32 s0, 0x86000
+; GFX6-NEXT:    buffer_store_dword v4, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v4, off, s[40:43], s0 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    v_readlane_b32 s36, v4, 0
+; GFX6-NEXT:    v_readlane_b32 s37, v4, 1
+; GFX6-NEXT:    v_readlane_b32 s38, v4, 2
+; GFX6-NEXT:    v_readlane_b32 s39, v4, 3
+; GFX6-NEXT:    buffer_load_dword v4, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    s_mov_b64 exec, s[8:9]
+; GFX6-NEXT:    s_mov_b64 s[4:5], exec
+; GFX6-NEXT:    s_mov_b64 exec, 15
+; GFX6-NEXT:    s_mov_b32 s6, 0x85800
+; GFX6-NEXT:    buffer_store_dword v7, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v7, off, s[40:43], s6 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    v_readlane_b32 s0, v7, 0
+; GFX6-NEXT:    v_readlane_b32 s1, v7, 1
+; GFX6-NEXT:    v_readlane_b32 s2, v7, 2
+; GFX6-NEXT:    v_readlane_b32 s3, v7, 3
+; GFX6-NEXT:    buffer_load_dword v7, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    s_mov_b64 exec, s[4:5]
+; GFX6-NEXT:    s_mov_b32 s2, 0x83800
+; GFX6-NEXT:    buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_mov_b32 s2, 0x84000
+; GFX6-NEXT:    buffer_store_dword v13, off, s[40:43], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v14, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v15, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v16, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_mov_b32 s2, 0x84800
+; GFX6-NEXT:    buffer_store_dword v17, off, s[40:43], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v18, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v19, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v20, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    ;;#ASMSTART
+; GFX6-NEXT:    ;;#ASMEND
+; GFX6-NEXT:    s_mov_b32 s2, 0x84800
+; GFX6-NEXT:    buffer_load_dword v17, off, s[40:43], s2 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v18, off, s[40:43], s2 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v19, off, s[40:43], s2 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v20, off, s[40:43], s2 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s2, 0x84000
+; GFX6-NEXT:    buffer_load_dword v13, off, s[40:43], s2 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v14, off, s[40:43], s2 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v15, off, s[40:43], s2 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v16, off, s[40:43], s2 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s2, 0x83800
+; GFX6-NEXT:    buffer_load_dword v0, off, s[40:43], s2 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    ;;#ASMSTART
+; GFX6-NEXT:    ;;#ASMEND
+; GFX6-NEXT:    ;;#ASMSTART
+; GFX6-NEXT:    ;;#ASMEND
+; GFX6-NEXT:    ;;#ASMSTART
+; GFX6-NEXT:    ;;#ASMEND
+; GFX6-NEXT:    ;;#ASMSTART
+; GFX6-NEXT:    ;;#ASMEND
+; GFX6-NEXT:    ;;#ASMSTART
+; GFX6-NEXT:    ;;#ASMEND
+; GFX6-NEXT:    ;;#ASMSTART
+; GFX6-NEXT:    ;;#ASMEND
+; GFX6-NEXT:  .LBB1_2: ; %ret
+; GFX6-NEXT:    s_or_b64 exec, exec, s[34:35]
+; GFX6-NEXT:    s_mov_b32 s4, 0x83400
+; GFX6-NEXT:    v_lshl_b64 v[4:5], v[5:6], 8
+; GFX6-NEXT:    buffer_load_dword v6, off, s[40:43], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v7, off, s[40:43], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v8, off, s[40:43], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v9, off, s[40:43], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; GFX6-NEXT:    s_mov_b32 s4, 0x83000
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], v[4:5], s[0:3], 0 addr64 offset:240
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v6, off, s[40:43], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v7, off, s[40:43], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v8, off, s[40:43], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v9, off, s[40:43], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x82c00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], v[4:5], s[0:3], 0 addr64 offset:224
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v6, off, s[40:43], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v7, off, s[40:43], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v8, off, s[40:43], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v9, off, s[40:43], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x82800
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], v[4:5], s[0:3], 0 addr64 offset:208
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v6, off, s[40:43], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v7, off, s[40:43], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v8, off, s[40:43], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v9, off, s[40:43], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x82400
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], v[4:5], s[0:3], 0 addr64 offset:192
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v6, off, s[40:43], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v7, off, s[40:43], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v8, off, s[40:43], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v9, off, s[40:43], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x82000
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], v[4:5], s[0:3], 0 addr64 offset:176
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v6, off, s[40:43], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v7, off, s[40:43], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v8, off, s[40:43], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v9, off, s[40:43], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x81c00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], v[4:5], s[0:3], 0 addr64 offset:160
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v6, off, s[40:43], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v7, off, s[40:43], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v8, off, s[40:43], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v9, off, s[40:43], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x81800
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], v[4:5], s[0:3], 0 addr64 offset:144
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v6, off, s[40:43], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v7, off, s[40:43], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v8, off, s[40:43], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v9, off, s[40:43], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x81400
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], v[4:5], s[0:3], 0 addr64 offset:128
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v6, off, s[40:43], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v7, off, s[40:43], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v8, off, s[40:43], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v9, off, s[40:43], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x81000
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], v[4:5], s[0:3], 0 addr64 offset:112
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v6, off, s[40:43], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v7, off, s[40:43], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v8, off, s[40:43], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v9, off, s[40:43], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x80c00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], v[4:5], s[0:3], 0 addr64 offset:96
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v6, off, s[40:43], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v7, off, s[40:43], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v8, off, s[40:43], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v9, off, s[40:43], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x80400
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], v[4:5], s[0:3], 0 addr64 offset:80
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v6, off, s[40:43], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v7, off, s[40:43], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v8, off, s[40:43], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v9, off, s[40:43], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s4, 0x80800
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], v[4:5], s[0:3], 0 addr64 offset:64
+; GFX6-NEXT:    buffer_store_dwordx4 v[17:20], v[4:5], s[0:3], 0 addr64 offset:48
+; GFX6-NEXT:    buffer_store_dwordx4 v[13:16], v[4:5], s[0:3], 0 addr64 offset:32
+; GFX6-NEXT:    s_waitcnt expcnt(2)
+; GFX6-NEXT:    buffer_load_dword v6, off, s[40:43], s4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v7, off, s[40:43], s4 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v8, off, s[40:43], s4 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v9, off, s[40:43], s4 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], v[4:5], s[0:3], 0 addr64 offset:16
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[4:5], s[0:3], 0 addr64
+; GFX6-NEXT:    s_endpgm
+;
+; GFX9-FLATSCR-LABEL: test_limited_sgpr:
+; GFX9-FLATSCR:       ; %bb.0: ; %entry
+; GFX9-FLATSCR-NEXT:    s_load_dwordx4 s[36:39], s[0:1], 0x24
+; GFX9-FLATSCR-NEXT:    v_mbcnt_lo_u32_b32 v0, -1, 0
+; GFX9-FLATSCR-NEXT:    v_mbcnt_hi_u32_b32 v5, -1, v0
+; GFX9-FLATSCR-NEXT:    v_lshlrev_b32_e32 v0, 8, v5
+; GFX9-FLATSCR-NEXT:    s_add_u32 flat_scratch_lo, s2, s5
+; GFX9-FLATSCR-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[1:4], v0, s[38:39] offset:240
+; GFX9-FLATSCR-NEXT:    s_addc_u32 flat_scratch_hi, s3, 0
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x20b0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v6, 0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v7, 1
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[1:4], s0 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[8:11], v0, s[38:39] offset:224
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[1:4], v0, s[38:39] offset:208
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x20a0
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[1:4], s0 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[20:23], v0, s[38:39] offset:192
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[1:4], v0, s[38:39] offset:176
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x2090
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[1:4], s0 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[16:19], v0, s[38:39] offset:160
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[1:4], v0, s[38:39] offset:144
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x2080
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[1:4], s0 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[1:4], v0, s[38:39] offset:128
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x20c0
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[1:4], s0 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[1:4], v0, s[38:39] offset:112
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x2060
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[1:4], s0 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[1:4], v0, s[38:39] offset:96
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x2050
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[1:4], s0 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[1:4], v0, s[38:39] offset:80
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x2040
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[1:4], s0 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[1:4], v0, s[38:39] offset:64
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x2030
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[1:4], s0 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[1:4], v0, s[38:39] offset:48
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x2020
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[1:4], s0 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[1:4], v0, s[38:39] offset:32
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x2070
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[1:4], s0 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[1:4], v0, s[38:39] offset:16
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x2010
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[1:4], s0 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v0, s[38:39]
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v4, 16
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    v_lshl_add_u32 v4, v0, 13, v4
+; GFX9-FLATSCR-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX9-FLATSCR-NEXT:    scratch_store_dword v4, v7, off
+; GFX9-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX9-FLATSCR-NEXT:    ; def s[0:7]
+; GFX9-FLATSCR-NEXT:    ;;#ASMEND
+; GFX9-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX9-FLATSCR-NEXT:    ; def s[8:15]
+; GFX9-FLATSCR-NEXT:    ;;#ASMEND
+; GFX9-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX9-FLATSCR-NEXT:    ; def s[16:23]
+; GFX9-FLATSCR-NEXT:    ;;#ASMEND
+; GFX9-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX9-FLATSCR-NEXT:    ; def s[24:31]
+; GFX9-FLATSCR-NEXT:    ;;#ASMEND
+; GFX9-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX9-FLATSCR-NEXT:    ; def s[40:43]
+; GFX9-FLATSCR-NEXT:    ;;#ASMEND
+; GFX9-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX9-FLATSCR-NEXT:    ; def s[38:39]
+; GFX9-FLATSCR-NEXT:    ;;#ASMEND
+; GFX9-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX9-FLATSCR-NEXT:    ; def s[44:45]
+; GFX9-FLATSCR-NEXT:    ;;#ASMEND
+; GFX9-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX9-FLATSCR-NEXT:    ; def s33
+; GFX9-FLATSCR-NEXT:    ;;#ASMEND
+; GFX9-FLATSCR-NEXT:    s_and_saveexec_b64 s[34:35], vcc
+; GFX9-FLATSCR-NEXT:    s_cbranch_execz .LBB1_2
+; GFX9-FLATSCR-NEXT:  ; %bb.1: ; %bb0
+; GFX9-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX9-FLATSCR-NEXT:    ; use s[0:7],s[8:15],s[16:23],s[24:31],s[40:43],s[38:39],s[44:45]
+; GFX9-FLATSCR-NEXT:    ;;#ASMEND
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x20d0
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x20e0
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[16:19], s0 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x20f0
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[20:23], s0 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x2100
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[8:11], s0 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x2100
+; GFX9-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX9-FLATSCR-NEXT:    ;;#ASMEND
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[8:11], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x20f0
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[20:23], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x20e0
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[16:19], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x20d0
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX9-FLATSCR-NEXT:    ;;#ASMEND
+; GFX9-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX9-FLATSCR-NEXT:    ;;#ASMEND
+; GFX9-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX9-FLATSCR-NEXT:    ;;#ASMEND
+; GFX9-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX9-FLATSCR-NEXT:    ;;#ASMEND
+; GFX9-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX9-FLATSCR-NEXT:    ;;#ASMEND
+; GFX9-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX9-FLATSCR-NEXT:    ;;#ASMEND
+; GFX9-FLATSCR-NEXT:  .LBB1_2: ; %ret
+; GFX9-FLATSCR-NEXT:    s_or_b64 exec, exec, s[34:35]
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x20b0
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[12:15], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    v_lshlrev_b64 v[4:5], 8, v[5:6]
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v6, s37
+; GFX9-FLATSCR-NEXT:    v_add_co_u32_e32 v4, vcc, s36, v4
+; GFX9-FLATSCR-NEXT:    v_addc_co_u32_e32 v5, vcc, v6, v5, vcc
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x20a0
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[12:15], off offset:240
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[8:11], off offset:224
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[6:9], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x2090
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[6:9], off offset:208
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[20:23], off offset:192
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[20:23], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x2080
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[20:23], off offset:176
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[16:19], off offset:160
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[16:19], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x20c0
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[6:9], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x2060
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[12:15], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x2050
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(2)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[16:19], off offset:144
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(2)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[6:9], off offset:128
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[6:9], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x2040
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(3)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[12:15], off offset:112
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(1)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[6:9], off offset:96
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[6:9], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x2030
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[6:9], off offset:80
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[6:9], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x2020
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[6:9], off offset:64
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[6:9], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x2070
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[6:9], off offset:48
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[6:9], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x2010
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[6:9], off offset:32
+; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[6:9], off, s0 ; 16-byte Folded Reload
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[6:9], off offset:16
+; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[0:3], off
+; GFX9-FLATSCR-NEXT:    s_endpgm
+;
+; GFX10-FLATSCR-LABEL: test_limited_sgpr:
+; GFX10-FLATSCR:       ; %bb.0: ; %entry
+; GFX10-FLATSCR-NEXT:    s_add_u32 s2, s2, s5
+; GFX10-FLATSCR-NEXT:    s_addc_u32 s3, s3, 0
+; GFX10-FLATSCR-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2
+; GFX10-FLATSCR-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3
+; GFX10-FLATSCR-NEXT:    s_load_dwordx4 s[36:39], s[0:1], 0x24
+; GFX10-FLATSCR-NEXT:    v_mbcnt_lo_u32_b32 v0, -1, 0
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v6, 0
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v7, 1
+; GFX10-FLATSCR-NEXT:    s_mov_b32 s33, exec_lo
+; GFX10-FLATSCR-NEXT:    v_mbcnt_hi_u32_b32 v5, -1, v0
+; GFX10-FLATSCR-NEXT:    v_lshlrev_b32_e32 v0, 8, v5
+; GFX10-FLATSCR-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-FLATSCR-NEXT:    s_clause 0xf
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[64:67], v0, s[38:39] offset:240
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[60:63], v0, s[38:39] offset:224
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[56:59], v0, s[38:39] offset:208
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[52:55], v0, s[38:39] offset:192
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[48:51], v0, s[38:39] offset:176
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[44:47], v0, s[38:39] offset:160
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[40:43], v0, s[38:39] offset:144
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[36:39], v0, s[38:39] offset:128
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[32:35], v0, s[38:39] offset:112
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[28:31], v0, s[38:39] offset:96
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[24:27], v0, s[38:39] offset:80
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[20:23], v0, s[38:39] offset:64
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[16:19], v0, s[38:39] offset:48
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[12:15], v0, s[38:39] offset:32
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[8:11], v0, s[38:39] offset:16
+; GFX10-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v0, s[38:39]
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    v_lshl_add_u32 v4, v0, 13, 16
+; GFX10-FLATSCR-NEXT:    scratch_store_dword v4, v7, off
+; GFX10-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX10-FLATSCR-NEXT:    ; def s[0:7]
+; GFX10-FLATSCR-NEXT:    ;;#ASMEND
+; GFX10-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX10-FLATSCR-NEXT:    ; def s[8:15]
+; GFX10-FLATSCR-NEXT:    ;;#ASMEND
+; GFX10-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX10-FLATSCR-NEXT:    ; def s[16:23]
+; GFX10-FLATSCR-NEXT:    ;;#ASMEND
+; GFX10-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX10-FLATSCR-NEXT:    ; def s[24:31]
+; GFX10-FLATSCR-NEXT:    ;;#ASMEND
+; GFX10-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX10-FLATSCR-NEXT:    ; def s[40:43]
+; GFX10-FLATSCR-NEXT:    ;;#ASMEND
+; GFX10-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX10-FLATSCR-NEXT:    ; def s[34:35]
+; GFX10-FLATSCR-NEXT:    ;;#ASMEND
+; GFX10-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX10-FLATSCR-NEXT:    ; def s[38:39]
+; GFX10-FLATSCR-NEXT:    ;;#ASMEND
+; GFX10-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX10-FLATSCR-NEXT:    ; def s44
+; GFX10-FLATSCR-NEXT:    ;;#ASMEND
+; GFX10-FLATSCR-NEXT:    v_cmpx_eq_u32_e32 0, v0
+; GFX10-FLATSCR-NEXT:    s_cbranch_execz .LBB1_2
+; GFX10-FLATSCR-NEXT:  ; %bb.1: ; %bb0
+; GFX10-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX10-FLATSCR-NEXT:    ; use s[0:7],s[8:15],s[16:23],s[24:31],s[40:43],s[34:35],s[38:39]
+; GFX10-FLATSCR-NEXT:    ;;#ASMEND
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0x2010
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v88, v59
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v92, v63
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v87, v58
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v86, v57
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v85, v56
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v91, v62
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v90, v61
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v89, v60
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v60, v35
+; GFX10-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[64:67], s0 ; 16-byte Folded Spill
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v68, v39
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v59, v34
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v58, v33
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v57, v32
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v67, v38
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v66, v37
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v65, v36
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v36, v11
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v72, v43
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v76, v47
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v80, v51
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v84, v55
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v33, v8
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v71, v42
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v70, v41
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v69, v40
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v40, v15
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v75, v46
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v74, v45
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v73, v44
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v44, v19
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v79, v50
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v78, v49
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v77, v48
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v48, v23
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v83, v54
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v82, v53
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v81, v52
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v52, v27
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v56, v31
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v35, v10
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v34, v9
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v37, v12
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v41, v16
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v45, v20
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v49, v24
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v53, v28
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v39, v14
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v38, v13
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v43, v18
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v42, v17
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v47, v22
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v46, v21
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v51, v26
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v50, v25
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v55, v30
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v54, v29
+; GFX10-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX10-FLATSCR-NEXT:    ;;#ASMEND
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v8, v33
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v28, v53
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v24, v49
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v20, v45
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v16, v41
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v12, v37
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v9, v34
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v10, v35
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v11, v36
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v32, v57
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v29, v54
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v30, v55
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v31, v56
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v25, v50
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v26, v51
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v27, v52
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v21, v46
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v22, v47
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v23, v48
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v17, v42
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v18, v43
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v19, v44
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v13, v38
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v14, v39
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v15, v40
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v33, v58
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v34, v59
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v35, v60
+; GFX10-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX10-FLATSCR-NEXT:    ;;#ASMEND
+; GFX10-FLATSCR-NEXT:    s_movk_i32 s0, 0x2010
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v36, v65
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v37, v66
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v38, v67
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v39, v68
+; GFX10-FLATSCR-NEXT:    scratch_load_dwordx4 v[64:67], off, s0 ; 16-byte Folded Reload
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v60, v89
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v56, v85
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v52, v81
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v48, v77
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v44, v73
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v40, v69
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v61, v90
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v62, v91
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v63, v92
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v57, v86
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v58, v87
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v59, v88
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v53, v82
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v54, v83
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v55, v84
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v49, v78
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v50, v79
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v51, v80
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v45, v74
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v46, v75
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v47, v76
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v41, v70
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v42, v71
+; GFX10-FLATSCR-NEXT:    v_mov_b32_e32 v43, v72
+; GFX10-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX10-FLATSCR-NEXT:    ;;#ASMEND
+; GFX10-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX10-FLATSCR-NEXT:    ;;#ASMEND
+; GFX10-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX10-FLATSCR-NEXT:    ;;#ASMEND
+; GFX10-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX10-FLATSCR-NEXT:    ;;#ASMEND
+; GFX10-FLATSCR-NEXT:    ;;#ASMSTART
+; GFX10-FLATSCR-NEXT:    ;;#ASMEND
+; GFX10-FLATSCR-NEXT:  .LBB1_2: ; %ret
+; GFX10-FLATSCR-NEXT:    s_or_b32 exec_lo, exec_lo, s33
+; GFX10-FLATSCR-NEXT:    v_lshlrev_b64 v[4:5], 8, v[5:6]
+; GFX10-FLATSCR-NEXT:    v_add_co_u32 v4, vcc_lo, s36, v4
+; GFX10-FLATSCR-NEXT:    v_add_co_ci_u32_e32 v5, vcc_lo, s37, v5, vcc_lo
+; GFX10-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[64:67], off offset:240
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[60:63], off offset:224
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[56:59], off offset:208
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[52:55], off offset:192
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[48:51], off offset:176
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[44:47], off offset:160
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[40:43], off offset:144
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[36:39], off offset:128
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[32:35], off offset:112
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[28:31], off offset:96
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[24:27], off offset:80
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[20:23], off offset:64
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[16:19], off offset:48
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[12:15], off offset:32
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[8:11], off offset:16
+; GFX10-FLATSCR-NEXT:    global_store_dwordx4 v[4:5], v[0:3], off
+; GFX10-FLATSCR-NEXT:    s_endpgm
 entry:
   %lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
   %tid = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %lo)
@@ -173,3 +11150,6 @@ declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #1
 
 attributes #0 = { "amdgpu-waves-per-eu"="10,10" }
 attributes #1 = { nounwind readnone }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}
+; FLATSCR: {{.*}}

diff  --git a/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir b/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir
index 69442032f37ec..3914c3b2fe073 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir
@@ -48,23 +48,23 @@ body:             |
     ; GFX9-NEXT: $sgpr12 = S_ADD_U32 $sgpr12, $sgpr9, implicit-def $scc, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15
     ; GFX9-NEXT: $sgpr13 = S_ADDC_U32 $sgpr13, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15
     ; GFX9-NEXT: $vcc = IMPLICIT_DEF
-    ; GFX9-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
-    ; GFX9-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
-    ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GFX9-NEXT: $vgpr0 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr0, implicit $vcc
-    ; GFX9-NEXT: $vgpr0 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr0, implicit $vcc
-    ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GFX9-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GFX9-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
+    ; GFX9-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec
+    ; GFX9-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr2
+    ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GFX9-NEXT: $vgpr2 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr2, implicit $vcc
+    ; GFX9-NEXT: $vgpr2 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr2, implicit $vcc
+    ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+    ; GFX9-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GFX9-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr2
     ; GFX9-NEXT: $vcc = IMPLICIT_DEF
-    ; GFX9-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
-    ; GFX9-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
-    ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GFX9-NEXT: $vgpr0 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr0, implicit $vcc
-    ; GFX9-NEXT: $vgpr0 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr0, implicit killed $vcc
-    ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GFX9-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GFX9-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
+    ; GFX9-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec
+    ; GFX9-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr1
+    ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GFX9-NEXT: $vgpr1 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr1, implicit $vcc
+    ; GFX9-NEXT: $vgpr1 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr1, implicit killed $vcc
+    ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+    ; GFX9-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GFX9-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr1
     ; GFX9-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GFX9-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
     ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
@@ -84,23 +84,23 @@ body:             |
     ; GFX10-NEXT: $sgpr96 = S_ADD_U32 $sgpr96, $sgpr9, implicit-def $scc, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99
     ; GFX10-NEXT: $sgpr97 = S_ADDC_U32 $sgpr97, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99
     ; GFX10-NEXT: $vcc = IMPLICIT_DEF
-    ; GFX10-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
-    ; GFX10-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
-    ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GFX10-NEXT: $vgpr0 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr0, implicit $vcc
-    ; GFX10-NEXT: $vgpr0 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr0, implicit $vcc
-    ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GFX10-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GFX10-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
+    ; GFX10-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec
+    ; GFX10-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr2
+    ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GFX10-NEXT: $vgpr2 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr2, implicit $vcc
+    ; GFX10-NEXT: $vgpr2 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr2, implicit $vcc
+    ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+    ; GFX10-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GFX10-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr2
     ; GFX10-NEXT: $vcc = IMPLICIT_DEF
-    ; GFX10-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
-    ; GFX10-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
-    ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GFX10-NEXT: $vgpr0 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr0, implicit $vcc
-    ; GFX10-NEXT: $vgpr0 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr0, implicit killed $vcc
-    ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GFX10-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GFX10-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
+    ; GFX10-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec
+    ; GFX10-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr1
+    ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GFX10-NEXT: $vgpr1 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr1, implicit $vcc
+    ; GFX10-NEXT: $vgpr1 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr1, implicit killed $vcc
+    ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+    ; GFX10-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GFX10-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr1
     ; GFX10-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GFX10-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
     ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
@@ -114,23 +114,23 @@ body:             |
     ; GFX11-NEXT: {{  $}}
     ; GFX11-NEXT: $sgpr33 = S_MOV_B32 0
     ; GFX11-NEXT: $vcc = IMPLICIT_DEF
-    ; GFX11-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
-    ; GFX11-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
-    ; GFX11-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GFX11-NEXT: $vgpr0 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr0, implicit $vcc
-    ; GFX11-NEXT: $vgpr0 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr0, implicit $vcc
-    ; GFX11-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
-    ; GFX11-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GFX11-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
+    ; GFX11-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec
+    ; GFX11-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr2
+    ; GFX11-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GFX11-NEXT: $vgpr2 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr2, implicit $vcc
+    ; GFX11-NEXT: $vgpr2 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr2, implicit $vcc
+    ; GFX11-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
+    ; GFX11-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GFX11-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr2
     ; GFX11-NEXT: $vcc = IMPLICIT_DEF
-    ; GFX11-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
-    ; GFX11-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
-    ; GFX11-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GFX11-NEXT: $vgpr0 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr0, implicit $vcc
-    ; GFX11-NEXT: $vgpr0 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr0, implicit killed $vcc
-    ; GFX11-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
-    ; GFX11-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
-    ; GFX11-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
+    ; GFX11-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec
+    ; GFX11-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr1
+    ; GFX11-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+    ; GFX11-NEXT: $vgpr1 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr1, implicit $vcc
+    ; GFX11-NEXT: $vgpr1 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr1, implicit killed $vcc
+    ; GFX11-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
+    ; GFX11-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
+    ; GFX11-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr1
     ; GFX11-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GFX11-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
     ; GFX11-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)

diff  --git a/llvm/test/CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir b/llvm/test/CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir
index 2dbfd58a3db62..2c4d9f0094470 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir
@@ -412,6 +412,7 @@ body:             |
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT: bb.1:
   ; MUBUF-NEXT:   successors: %bb.2(0x80000000)
+  ; MUBUF-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT:   S_NOP 0
   ; MUBUF-NEXT: {{  $}}
@@ -432,6 +433,7 @@ body:             |
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT: bb.1:
   ; GFX9-FLATSCR-NEXT:   successors: %bb.2(0x80000000)
+  ; GFX9-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT:   S_NOP 0
   ; GFX9-FLATSCR-NEXT: {{  $}}
@@ -451,6 +453,7 @@ body:             |
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT: bb.1:
   ; GFX10-FLATSCR-NEXT:   successors: %bb.2(0x80000000)
+  ; GFX10-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT:   S_NOP 0
   ; GFX10-FLATSCR-NEXT: {{  $}}
@@ -470,6 +473,7 @@ body:             |
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
   ; VMEM-GFX8-NEXT:   successors: %bb.2(0x80000000)
+  ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_NOP 0
   ; VMEM-GFX8-NEXT: {{  $}}
@@ -483,6 +487,7 @@ body:             |
     S_CBRANCH_SCC1 %bb.2, implicit $scc
 
   bb.1:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
     S_NOP 0
 
   bb.2:
@@ -517,6 +522,7 @@ body:             |
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT: bb.1:
   ; MUBUF-NEXT:   successors: %bb.2(0x80000000)
+  ; MUBUF-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT:   S_NOP 0
   ; MUBUF-NEXT: {{  $}}
@@ -537,6 +543,7 @@ body:             |
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT: bb.1:
   ; GFX9-FLATSCR-NEXT:   successors: %bb.2(0x80000000)
+  ; GFX9-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT:   S_NOP 0
   ; GFX9-FLATSCR-NEXT: {{  $}}
@@ -556,6 +563,7 @@ body:             |
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT: bb.1:
   ; GFX10-FLATSCR-NEXT:   successors: %bb.2(0x80000000)
+  ; GFX10-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT:   S_NOP 0
   ; GFX10-FLATSCR-NEXT: {{  $}}
@@ -576,6 +584,7 @@ body:             |
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
   ; VMEM-GFX8-NEXT:   successors: %bb.2(0x80000000)
+  ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_NOP 0
   ; VMEM-GFX8-NEXT: {{  $}}
@@ -589,6 +598,7 @@ body:             |
     S_CBRANCH_SCC1 %bb.2, implicit $scc
 
   bb.1:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
     S_NOP 0
 
   bb.2:
@@ -624,6 +634,7 @@ body:             |
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT: bb.1:
   ; MUBUF-NEXT:   successors: %bb.2(0x80000000)
+  ; MUBUF-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT:   S_NOP 0
   ; MUBUF-NEXT: {{  $}}
@@ -644,6 +655,7 @@ body:             |
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT: bb.1:
   ; GFX9-FLATSCR-NEXT:   successors: %bb.2(0x80000000)
+  ; GFX9-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT:   S_NOP 0
   ; GFX9-FLATSCR-NEXT: {{  $}}
@@ -663,6 +675,7 @@ body:             |
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT: bb.1:
   ; GFX10-FLATSCR-NEXT:   successors: %bb.2(0x80000000)
+  ; GFX10-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT:   S_NOP 0
   ; GFX10-FLATSCR-NEXT: {{  $}}
@@ -684,6 +697,7 @@ body:             |
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
   ; VMEM-GFX8-NEXT:   successors: %bb.2(0x80000000)
+  ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_NOP 0
   ; VMEM-GFX8-NEXT: {{  $}}
@@ -697,6 +711,7 @@ body:             |
     S_CBRANCH_SCC1 %bb.2, implicit $scc
 
   bb.1:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
     S_NOP 0
 
   bb.2:
@@ -730,6 +745,7 @@ body:             |
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT: bb.1:
   ; MUBUF-NEXT:   successors: %bb.2(0x80000000)
+  ; MUBUF-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT:   S_NOP 0
   ; MUBUF-NEXT: {{  $}}
@@ -750,6 +766,7 @@ body:             |
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT: bb.1:
   ; GFX9-FLATSCR-NEXT:   successors: %bb.2(0x80000000)
+  ; GFX9-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT:   S_NOP 0
   ; GFX9-FLATSCR-NEXT: {{  $}}
@@ -769,6 +786,7 @@ body:             |
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT: bb.1:
   ; GFX10-FLATSCR-NEXT:   successors: %bb.2(0x80000000)
+  ; GFX10-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT:   S_NOP 0
   ; GFX10-FLATSCR-NEXT: {{  $}}
@@ -788,6 +806,7 @@ body:             |
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
   ; VMEM-GFX8-NEXT:   successors: %bb.2(0x80000000)
+  ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_NOP 0
   ; VMEM-GFX8-NEXT: {{  $}}
@@ -801,6 +820,7 @@ body:             |
     S_CBRANCH_SCC1 %bb.2, implicit $scc
 
   bb.1:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
     S_NOP 0
 
   bb.2:
@@ -835,6 +855,7 @@ body:             |
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT: bb.1:
   ; MUBUF-NEXT:   successors: %bb.2(0x80000000)
+  ; MUBUF-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT:   S_NOP 0
   ; MUBUF-NEXT: {{  $}}
@@ -855,6 +876,7 @@ body:             |
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT: bb.1:
   ; GFX9-FLATSCR-NEXT:   successors: %bb.2(0x80000000)
+  ; GFX9-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT:   S_NOP 0
   ; GFX9-FLATSCR-NEXT: {{  $}}
@@ -874,6 +896,7 @@ body:             |
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT: bb.1:
   ; GFX10-FLATSCR-NEXT:   successors: %bb.2(0x80000000)
+  ; GFX10-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT:   S_NOP 0
   ; GFX10-FLATSCR-NEXT: {{  $}}
@@ -894,6 +917,7 @@ body:             |
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
   ; VMEM-GFX8-NEXT:   successors: %bb.2(0x80000000)
+  ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_NOP 0
   ; VMEM-GFX8-NEXT: {{  $}}
@@ -907,6 +931,7 @@ body:             |
     S_CBRANCH_SCC1 %bb.2, implicit $scc
 
   bb.1:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
     S_NOP 0
 
   bb.2:
@@ -942,6 +967,7 @@ body:             |
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT: bb.1:
   ; MUBUF-NEXT:   successors: %bb.2(0x80000000)
+  ; MUBUF-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT:   S_NOP 0
   ; MUBUF-NEXT: {{  $}}
@@ -962,6 +988,7 @@ body:             |
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT: bb.1:
   ; GFX9-FLATSCR-NEXT:   successors: %bb.2(0x80000000)
+  ; GFX9-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT:   S_NOP 0
   ; GFX9-FLATSCR-NEXT: {{  $}}
@@ -981,6 +1008,7 @@ body:             |
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT: bb.1:
   ; GFX10-FLATSCR-NEXT:   successors: %bb.2(0x80000000)
+  ; GFX10-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT:   S_NOP 0
   ; GFX10-FLATSCR-NEXT: {{  $}}
@@ -1002,6 +1030,7 @@ body:             |
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
   ; VMEM-GFX8-NEXT:   successors: %bb.2(0x80000000)
+  ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_NOP 0
   ; VMEM-GFX8-NEXT: {{  $}}
@@ -1015,6 +1044,7 @@ body:             |
     S_CBRANCH_SCC1 %bb.2, implicit $scc
 
   bb.1:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
     S_NOP 0
 
   bb.2:
@@ -1144,6 +1174,7 @@ body:             |
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT: bb.1:
   ; MUBUF-NEXT:   successors: %bb.2(0x80000000)
+  ; MUBUF-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT:   S_NOP 0
   ; MUBUF-NEXT: {{  $}}
@@ -1164,6 +1195,7 @@ body:             |
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT: bb.1:
   ; GFX9-FLATSCR-NEXT:   successors: %bb.2(0x80000000)
+  ; GFX9-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT:   S_NOP 0
   ; GFX9-FLATSCR-NEXT: {{  $}}
@@ -1184,6 +1216,7 @@ body:             |
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT: bb.1:
   ; GFX10-FLATSCR-NEXT:   successors: %bb.2(0x80000000)
+  ; GFX10-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT:   S_NOP 0
   ; GFX10-FLATSCR-NEXT: {{  $}}
@@ -1203,6 +1236,7 @@ body:             |
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
   ; VMEM-GFX8-NEXT:   successors: %bb.2(0x80000000)
+  ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_NOP 0
   ; VMEM-GFX8-NEXT: {{  $}}
@@ -1217,6 +1251,7 @@ body:             |
     S_CBRANCH_SCC1 %bb.2, implicit $scc
 
   bb.1:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
     S_NOP 0
 
   bb.2:

diff  --git a/llvm/unittests/CodeGen/MFCommon.inc b/llvm/unittests/CodeGen/MFCommon.inc
index 3c8326890cedf..a884f9ebeb54a 100644
--- a/llvm/unittests/CodeGen/MFCommon.inc
+++ b/llvm/unittests/CodeGen/MFCommon.inc
@@ -64,9 +64,12 @@ public:
   Register getFrameRegister(const MachineFunction &MF) const override {
     return 0;
   }
-  void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
+  bool eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
                            unsigned FIOperandNum,
-                           RegScavenger *RS = nullptr) const override {}
+                           RegScavenger *RS = nullptr) const override {
+    return false;
+
+  }
 };
 
 class BogusSubtarget : public TargetSubtargetInfo {


        


More information about the llvm-commits mailing list