[llvm] [AMDGPU] Improve isBasicBlockPrologue helper function (PR #69924)

Christudasan Devadasan via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 25 11:41:46 PDT 2023


https://github.com/cdevadas updated https://github.com/llvm/llvm-project/pull/69924

>From 9f460bf6069e23d8c97c069d58739b00d5dd53f6 Mon Sep 17 00:00:00 2001
From: Christudasan Devadasan <Christudasan.Devadasan at amd.com>
Date: Mon, 23 Oct 2023 13:29:17 +0530
Subject: [PATCH 1/2] [AMDGPU] Add pseudo instructions for SGPR spill to VGPR

For a future patch, is it important to keep the lowered SGPR
spills to be recognized as spill instructions during regalloc.
Directly lowering them into V_WRITELANE/V_READLANE won't allow
us to attach the SPILL flag to their instructions.

This patch introduces the pseudo instructions with the SGPRSpill
flag set in their Desc. They will get lowered to equivalent
instructions later during post RA pseudo expansion.
---
 llvm/lib/Target/AMDGPU/SIFrameLowering.cpp    |  13 +-
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp        |  14 +-
 llvm/lib/Target/AMDGPU/SIInstructions.td      |  17 +
 llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp     |  12 +-
 .../AMDGPU/av_spill_cross_bb_usage.mir        |   8 +-
 .../AMDGPU/csr-sgpr-spill-live-ins.mir        |   8 +-
 .../AMDGPU/extend-wwm-virt-reg-liveness.mir   |  16 +-
 .../AMDGPU/pei-scavenge-sgpr-carry-out.mir    |   4 +-
 .../CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir |   9 +-
 .../test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir |   4 +-
 .../AMDGPU/pei-scavenge-vgpr-spill.mir        |  14 +-
 .../AMDGPU/preserve-only-inactive-lane.mir    |   8 +-
 .../CodeGen/AMDGPU/same-slot-agpr-sgpr.mir    |   8 +-
 .../sgpr-spill-dead-frame-in-dbg-value.mir    |   8 +-
 .../AMDGPU/sgpr-spill-partially-undef.mir     |   8 +-
 .../AMDGPU/sgpr-spill-to-vmem-scc-clobber.mir |  32 +-
 .../AMDGPU/sgpr-spill-vmem-large-frame.mir    |   4 +-
 llvm/test/CodeGen/AMDGPU/sgpr-spill.mir       | 886 +++++++++---------
 .../CodeGen/AMDGPU/si-lower-sgpr-spills.mir   |  16 +-
 .../AMDGPU/snippet-copy-bundle-regression.mir |  48 +-
 .../AMDGPU/spill-reg-tuple-super-reg-use.mir  |  16 +-
 .../AMDGPU/spill-sgpr-csr-live-ins.mir        |   2 +-
 .../AMDGPU/spill-sgpr-to-virtual-vgpr.mir     | 212 ++---
 .../CodeGen/AMDGPU/spill-special-sgpr.mir     |  38 +-
 .../CodeGen/AMDGPU/spill-writelane-vgprs.ll   |   8 -
 llvm/test/CodeGen/AMDGPU/spill192.mir         |  26 +-
 llvm/test/CodeGen/AMDGPU/spill224.mir         |  30 +-
 llvm/test/CodeGen/AMDGPU/spill288.mir         |  38 +-
 llvm/test/CodeGen/AMDGPU/spill320.mir         |  42 +-
 llvm/test/CodeGen/AMDGPU/spill352.mir         |  46 +-
 llvm/test/CodeGen/AMDGPU/spill384.mir         |  50 +-
 ...d-op-for-wwm-scratch-reg-spill-restore.mir |  24 +-
 .../AMDGPU/track-spilled-vgpr-liveness.mir    |  18 -
 .../CodeGen/AMDGPU/use_restore_frame_reg.mir  |   9 +-
 34 files changed, 860 insertions(+), 836 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index 5de993ec3cba1d8..350e2f1e3b98712 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -274,7 +274,8 @@ class PrologEpilogSGPRSpillBuilder {
       Register SubReg = NumSubRegs == 1
                             ? SuperReg
                             : Register(TRI.getSubReg(SuperReg, SplitParts[I]));
-      BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill[I].VGPR)
+      BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_S32_TO_VGPR),
+              Spill[I].VGPR)
           .addReg(SubReg)
           .addImm(Spill[I].Lane)
           .addReg(Spill[I].VGPR, RegState::Undef);
@@ -319,7 +320,7 @@ class PrologEpilogSGPRSpillBuilder {
       Register SubReg = NumSubRegs == 1
                             ? SuperReg
                             : Register(TRI.getSubReg(SuperReg, SplitParts[I]));
-      BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_READLANE_B32), SubReg)
+      BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_RESTORE_S32_FROM_VGPR), SubReg)
           .addReg(Spill[I].VGPR)
           .addImm(Spill[I].Lane);
     }
@@ -1554,12 +1555,10 @@ void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
       // TODO: Handle this elsewhere at an early point. Walking through all MBBs
       // here would be a bad heuristic. A better way should be by calling
       // allocateWWMSpill during the regalloc pipeline whenever a physical
-      // register is allocated for the intended virtual registers. That will
-      // also help excluding the general use of WRITELANE/READLANE intrinsics
-      // that won't really need any such special handling.
-      if (MI.getOpcode() == AMDGPU::V_WRITELANE_B32)
+      // register is allocated for the intended virtual registers.
+      if (MI.getOpcode() == AMDGPU::SI_SPILL_S32_TO_VGPR)
         MFI->allocateWWMSpill(MF, MI.getOperand(0).getReg());
-      else if (MI.getOpcode() == AMDGPU::V_READLANE_B32)
+      else if (MI.getOpcode() == AMDGPU::SI_RESTORE_S32_FROM_VGPR)
         MFI->allocateWWMSpill(MF, MI.getOperand(1).getReg());
       else if (TII->isWWMRegSpillOpcode(MI.getOpcode()))
         NeedExecCopyReservedReg = true;
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 827c2c156638468..83690fcee30958d 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2090,6 +2090,14 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
     MI.setDesc(get(AMDGPU::S_AND_SAVEEXEC_B32));
     break;
 
+  case AMDGPU::SI_SPILL_S32_TO_VGPR:
+    MI.setDesc(get(AMDGPU::V_WRITELANE_B32));
+    break;
+
+  case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
+    MI.setDesc(get(AMDGPU::V_READLANE_B32));
+    break;
+
   case AMDGPU::V_MOV_B64_PSEUDO: {
     Register Dst = MI.getOperand(0).getReg();
     Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
@@ -4300,7 +4308,9 @@ static bool shouldReadExec(const MachineInstr &MI) {
   if (SIInstrInfo::isVALU(MI)) {
     switch (MI.getOpcode()) {
     case AMDGPU::V_READLANE_B32:
+    case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
     case AMDGPU::V_WRITELANE_B32:
+    case AMDGPU::SI_SPILL_S32_TO_VGPR:
       return false;
     }
 
@@ -8969,7 +8979,9 @@ SIInstrInfo::getInstructionUniformity(const MachineInstr &MI) const {
     return InstructionUniformity::NeverUniform;
 
   unsigned opcode = MI.getOpcode();
-  if (opcode == AMDGPU::V_READLANE_B32 || opcode == AMDGPU::V_READFIRSTLANE_B32)
+  if (opcode == AMDGPU::V_READLANE_B32 ||
+      opcode == AMDGPU::V_READFIRSTLANE_B32 ||
+      opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR)
     return InstructionUniformity::AlwaysUniform;
 
   if (isCopyInstr(MI)) {
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 567f1b812c1808c..8715ee1893d58a7 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -875,6 +875,23 @@ defm SI_SPILL_S384 : SI_SPILL_SGPR <SReg_384>;
 defm SI_SPILL_S512 : SI_SPILL_SGPR <SReg_512>;
 defm SI_SPILL_S1024 : SI_SPILL_SGPR <SReg_1024>;
 
+let SGPRSpill = 1, VALU = 1, isConvergent = 1 in {
+def SI_SPILL_S32_TO_VGPR : PseudoInstSI <(outs VGPR_32:$vdst),
+  (ins SReg_32:$src0, i32imm:$src1, VGPR_32:$vdst_in)> {
+  let hasSideEffects = 0;
+  let mayLoad = 0;
+  let mayStore = 0;
+  let Constraints = "$vdst = $vdst_in";
+}
+
+def SI_RESTORE_S32_FROM_VGPR : PseudoInstSI <(outs SReg_32:$sdst),
+  (ins VGPR_32:$src0, i32imm:$src1)> {
+  let hasSideEffects = 0;
+  let mayLoad = 0;
+  let mayStore = 0;
+}
+} // End SGPRSpill = 1, VALU = 1, isConvergent = 1
+
 // VGPR or AGPR spill instructions. In case of AGPR spilling a temp register
 // needs to be used and an extra instruction to move between VGPR and AGPR.
 // UsesTmp adds to the total size of an expanded spill in this case.
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index d0a81673d6528c2..d22056e372c2d88 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -1769,7 +1769,7 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, int Index,
       // Mark the "old value of vgpr" input undef only if this is the first sgpr
       // spill to this specific vgpr in the first basic block.
       auto MIB = BuildMI(*SB.MBB, MI, SB.DL,
-                         SB.TII.get(AMDGPU::V_WRITELANE_B32), Spill.VGPR)
+                         SB.TII.get(AMDGPU::SI_SPILL_S32_TO_VGPR), Spill.VGPR)
                      .addReg(SubReg, getKillRegState(UseKill))
                      .addImm(Spill.Lane)
                      .addReg(Spill.VGPR);
@@ -1815,8 +1815,8 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, int Index,
                 : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
 
         MachineInstrBuilder WriteLane =
-            BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_WRITELANE_B32),
-                    SB.TmpVGPR)
+            BuildMI(*SB.MBB, MI, SB.DL,
+                    SB.TII.get(AMDGPU::SI_SPILL_S32_TO_VGPR), SB.TmpVGPR)
                 .addReg(SubReg, SubKillState)
                 .addImm(i % PVD.PerVGPR)
                 .addReg(SB.TmpVGPR, TmpVGPRFlags);
@@ -1877,8 +1877,8 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, int Index,
               : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
 
       SpilledReg Spill = VGPRSpills[i];
-      auto MIB = BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_READLANE_B32),
-                         SubReg)
+      auto MIB = BuildMI(*SB.MBB, MI, SB.DL,
+                         SB.TII.get(AMDGPU::SI_RESTORE_S32_FROM_VGPR), SubReg)
                      .addReg(Spill.VGPR)
                      .addImm(Spill.Lane);
       if (SB.NumSubRegs > 1 && i == 0)
@@ -1911,7 +1911,7 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, int Index,
 
         bool LastSubReg = (i + 1 == e);
         auto MIB = BuildMI(*SB.MBB, MI, SB.DL,
-                           SB.TII.get(AMDGPU::V_READLANE_B32), SubReg)
+                           SB.TII.get(AMDGPU::SI_RESTORE_S32_FROM_VGPR), SubReg)
                        .addReg(SB.TmpVGPR, getKillRegState(LastSubReg))
                        .addImm(i);
         if (SB.NumSubRegs > 1 && i == 0)
diff --git a/llvm/test/CodeGen/AMDGPU/av_spill_cross_bb_usage.mir b/llvm/test/CodeGen/AMDGPU/av_spill_cross_bb_usage.mir
index 7209d160e6c8a7a..65648bacd556793 100644
--- a/llvm/test/CodeGen/AMDGPU/av_spill_cross_bb_usage.mir
+++ b/llvm/test/CodeGen/AMDGPU/av_spill_cross_bb_usage.mir
@@ -60,8 +60,8 @@ body:             |
   ; GCN-NEXT:   renamable $vgpr46 = COPY $vgpr1, implicit $exec
   ; GCN-NEXT:   renamable $vgpr45 = COPY $vgpr0, implicit $exec
   ; GCN-NEXT:   renamable $sgpr16_sgpr17 = IMPLICIT_DEF
-  ; GCN-NEXT:   $vgpr40 = V_WRITELANE_B32 $sgpr30, 0, $vgpr40, implicit-def $sgpr30_sgpr31, implicit $sgpr30_sgpr31
-  ; GCN-NEXT:   $vgpr40 = V_WRITELANE_B32 $sgpr31, 1, $vgpr40, implicit $sgpr30_sgpr31
+  ; GCN-NEXT:   $vgpr40 = SI_SPILL_S32_TO_VGPR $sgpr30, 0, $vgpr40, implicit-def $sgpr30_sgpr31, implicit $sgpr30_sgpr31
+  ; GCN-NEXT:   $vgpr40 = SI_SPILL_S32_TO_VGPR $sgpr31, 1, $vgpr40, implicit $sgpr30_sgpr31
   ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec, implicit-def $vgpr14_vgpr15, implicit $vgpr14_vgpr15 :: (store (s32) into %stack.1, addrspace 5)
   ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec, implicit killed $vgpr14_vgpr15 :: (store (s32) into %stack.1 + 4, addrspace 5)
   ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec, implicit-def $vgpr10_vgpr11, implicit $vgpr10_vgpr11 :: (store (s32) into %stack.2, addrspace 5)
@@ -124,8 +124,8 @@ body:             |
 
     ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
     renamable $sgpr16_sgpr17 = IMPLICIT_DEF
-    $vgpr40 = V_WRITELANE_B32 $sgpr30, 0, $vgpr40, implicit-def $sgpr30_sgpr31, implicit $sgpr30_sgpr31
-    $vgpr40 = V_WRITELANE_B32 killed $sgpr31, 1, $vgpr40, implicit killed $sgpr30_sgpr31
+    $vgpr40 = SI_SPILL_S32_TO_VGPR $sgpr30, 0, $vgpr40, implicit-def $sgpr30_sgpr31, implicit $sgpr30_sgpr31
+    $vgpr40 = SI_SPILL_S32_TO_VGPR killed $sgpr31, 1, $vgpr40, implicit killed $sgpr30_sgpr31
     dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr16_sgpr17, 0, csr_amdgpu, implicit-def dead $vgpr0
     %8:vreg_64 = nofpexcept V_FMA_F64_e64 0, %7, 0, %6, 0, %5, 0, 0, implicit $mode, implicit $exec
     ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
diff --git a/llvm/test/CodeGen/AMDGPU/csr-sgpr-spill-live-ins.mir b/llvm/test/CodeGen/AMDGPU/csr-sgpr-spill-live-ins.mir
index aed642d1f0670e1..4085cf2501b44ce 100644
--- a/llvm/test/CodeGen/AMDGPU/csr-sgpr-spill-live-ins.mir
+++ b/llvm/test/CodeGen/AMDGPU/csr-sgpr-spill-live-ins.mir
@@ -19,10 +19,10 @@ body: |
   ; CHECK-NEXT:   $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
   ; CHECK-NEXT:   BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
   ; CHECK-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
-  ; CHECK-NEXT:   $vgpr0 = V_WRITELANE_B32 $sgpr42, 0, $vgpr0
-  ; CHECK-NEXT:   $vgpr0 = V_WRITELANE_B32 $sgpr43, 1, $vgpr0
-  ; CHECK-NEXT:   $vgpr0 = V_WRITELANE_B32 $sgpr46, 2, $vgpr0
-  ; CHECK-NEXT:   $vgpr0 = V_WRITELANE_B32 $sgpr47, 3, $vgpr0
+  ; CHECK-NEXT:   $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr42, 0, $vgpr0
+  ; CHECK-NEXT:   $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr43, 1, $vgpr0
+  ; CHECK-NEXT:   $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr46, 2, $vgpr0
+  ; CHECK-NEXT:   $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr47, 3, $vgpr0
   ; CHECK-NEXT:   S_NOP 0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1:
diff --git a/llvm/test/CodeGen/AMDGPU/extend-wwm-virt-reg-liveness.mir b/llvm/test/CodeGen/AMDGPU/extend-wwm-virt-reg-liveness.mir
index f802e1b1e18af5d..3bf7e7b8c569605 100644
--- a/llvm/test/CodeGen/AMDGPU/extend-wwm-virt-reg-liveness.mir
+++ b/llvm/test/CodeGen/AMDGPU/extend-wwm-virt-reg-liveness.mir
@@ -26,9 +26,9 @@ body:             |
     ; GCN: liveins: $sgpr4, $vgpr2_vgpr3
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: renamable $vgpr0 = IMPLICIT_DEF
-    ; GCN-NEXT: renamable $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, killed $vgpr0
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, killed $vgpr0
     ; GCN-NEXT: S_NOP 0
-    ; GCN-NEXT: $sgpr4 = V_READLANE_B32 $vgpr0, 0
+    ; GCN-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0
     ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 20, implicit $exec
     ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr2_vgpr3, killed renamable $vgpr1, 0, 0, implicit $exec
     ; GCN-NEXT: KILL killed renamable $vgpr0
@@ -77,9 +77,9 @@ body:             |
   ; GCN-NEXT:   successors: %bb.3(0x80000000)
   ; GCN-NEXT:   liveins: $sgpr6, $vgpr0, $sgpr10_sgpr11
   ; GCN-NEXT: {{  $}}
-  ; GCN-NEXT:   renamable $vgpr0 = V_WRITELANE_B32 $sgpr6, 0, killed $vgpr0
+  ; GCN-NEXT:   renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr6, 0, killed $vgpr0
   ; GCN-NEXT:   S_NOP 0
-  ; GCN-NEXT:   $sgpr6 = V_READLANE_B32 $vgpr0, 0
+  ; GCN-NEXT:   $sgpr6 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0
   ; GCN-NEXT:   renamable $vgpr1 = V_MOV_B32_e32 20, implicit $exec
   ; GCN-NEXT:   S_BRANCH %bb.3
   ; GCN-NEXT: {{  $}}
@@ -143,9 +143,9 @@ body:             |
   ; GCN-NEXT:   successors: %bb.2(0x80000000)
   ; GCN-NEXT:   liveins: $sgpr4, $vgpr0, $sgpr10_sgpr11
   ; GCN-NEXT: {{  $}}
-  ; GCN-NEXT:   renamable $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, killed $vgpr0
+  ; GCN-NEXT:   renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, killed $vgpr0
   ; GCN-NEXT:   S_NOP 0
-  ; GCN-NEXT:   $sgpr4 = V_READLANE_B32 $vgpr0, 0
+  ; GCN-NEXT:   $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0
   ; GCN-NEXT:   renamable $vgpr1 = V_MOV_B32_e32 20, implicit $exec
   ; GCN-NEXT:   S_BRANCH %bb.2
   ; GCN-NEXT: {{  $}}
@@ -245,9 +245,9 @@ body:             |
   ; GCN-NEXT: bb.1:
   ; GCN-NEXT:   liveins: $sgpr4, $vgpr0, $vgpr2_vgpr3
   ; GCN-NEXT: {{  $}}
-  ; GCN-NEXT:   renamable $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, killed $vgpr0
+  ; GCN-NEXT:   renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, killed $vgpr0
   ; GCN-NEXT:   S_NOP 0
-  ; GCN-NEXT:   $sgpr4 = V_READLANE_B32 $vgpr0, 0
+  ; GCN-NEXT:   $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0
   ; GCN-NEXT:   renamable $vgpr1 = V_MOV_B32_e32 10, implicit $exec
   ; GCN-NEXT:   GLOBAL_STORE_DWORD $vgpr2_vgpr3, killed renamable $vgpr1, 0, 0, implicit $exec
   ; GCN-NEXT:   KILL killed renamable $vgpr0
diff --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir
index 33e766ad3bf9e18..a20486617075946 100644
--- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir
+++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir
@@ -36,7 +36,7 @@ body:             |
     ; CHECK-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 1048832, implicit-def dead $scc
     ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
     ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7
-    ; CHECK-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr4, 0, undef $vgpr2
+    ; CHECK-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, undef $vgpr2
     ; CHECK-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
     ; CHECK-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
     ; CHECK-NEXT: $sgpr33 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc
@@ -50,7 +50,7 @@ body:             |
     ; CHECK-NEXT: $sgpr33 = S_ADD_I32 killed $sgpr33, -16384, implicit-def $scc
     ; CHECK-NEXT: $sgpr33 = S_LSHL_B32 $sgpr33, 6, implicit-def $scc
     ; CHECK-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31
-    ; CHECK-NEXT: $sgpr4 = V_READLANE_B32 $vgpr2, 0
+    ; CHECK-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 0
     ; CHECK-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
     ; CHECK-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 1048832, implicit-def dead $scc
     ; CHECK-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
diff --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir
index 8cffede47704524..a4104737d974fb5 100644
--- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir
+++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir
@@ -32,7 +32,7 @@ body:             |
     ; MUBUF-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 1048832, implicit-def dead $scc
     ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
     ; MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7
-    ; MUBUF-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr4, 0, undef $vgpr2
+    ; MUBUF-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, undef $vgpr2
     ; MUBUF-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
     ; MUBUF-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
     ; MUBUF-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
@@ -40,7 +40,7 @@ body:             |
     ; MUBUF-NEXT: $vgpr3 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
     ; MUBUF-NEXT: $vgpr3 = V_ADD_U32_e32 16384, killed $vgpr3, implicit $exec
     ; MUBUF-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31
-    ; MUBUF-NEXT: $sgpr4 = V_READLANE_B32 $vgpr2, 0
+    ; MUBUF-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 0
     ; MUBUF-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
     ; MUBUF-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 1048832, implicit-def dead $scc
     ; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
@@ -48,6 +48,7 @@ body:             |
     ; MUBUF-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
     ; MUBUF-NEXT: $sgpr33 = COPY $sgpr4
     ; MUBUF-NEXT: S_ENDPGM 0, implicit $vcc
+    ;
     ; FLATSCR-LABEL: name: scavenge_sgpr_pei_no_sgprs
     ; FLATSCR: liveins: $vgpr1, $vgpr2
     ; FLATSCR-NEXT: {{  $}}
@@ -58,7 +59,7 @@ body:             |
     ; FLATSCR-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 16388, implicit-def dead $scc
     ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr2, killed $sgpr5, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.3, addrspace 5)
     ; FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7
-    ; FLATSCR-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr4, 0, undef $vgpr2
+    ; FLATSCR-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, undef $vgpr2
     ; FLATSCR-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc
     ; FLATSCR-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
     ; FLATSCR-NEXT: $sgpr33 = S_ADD_I32 $sgpr33, 8192, implicit-def $scc
@@ -67,7 +68,7 @@ body:             |
     ; FLATSCR-NEXT: $sgpr33 = S_ADD_I32 $sgpr33, 16384, implicit-def $scc
     ; FLATSCR-NEXT: $vgpr0 = V_OR_B32_e32 $sgpr33, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31
     ; FLATSCR-NEXT: $sgpr33 = S_ADD_I32 $sgpr33, -16384, implicit-def $scc
-    ; FLATSCR-NEXT: $sgpr4 = V_READLANE_B32 $vgpr2, 0
+    ; FLATSCR-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 0
     ; FLATSCR-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
     ; FLATSCR-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 16388, implicit-def dead $scc
     ; FLATSCR-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR killed $sgpr5, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.3, addrspace 5)
diff --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir
index 934c3b11c1ac936..45e95d133e1bb42 100644
--- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir
+++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir
@@ -31,7 +31,7 @@ body:             |
     ; CHECK-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 262400, implicit-def dead $scc
     ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
     ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7
-    ; CHECK-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr4, 0, undef $vgpr2
+    ; CHECK-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, undef $vgpr2
     ; CHECK-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 786432, implicit-def dead $scc
     ; CHECK-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
     ; CHECK-NEXT: $sgpr33 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc
@@ -40,7 +40,7 @@ body:             |
     ; CHECK-NEXT: $sgpr33 = S_ADD_I32 killed $sgpr33, -4096, implicit-def $scc
     ; CHECK-NEXT: $sgpr33 = S_LSHL_B32 $sgpr33, 6, implicit-def $scc
     ; CHECK-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31
-    ; CHECK-NEXT: $sgpr4 = V_READLANE_B32 $vgpr2, 0
+    ; CHECK-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 0
     ; CHECK-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
     ; CHECK-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 262400, implicit-def dead $scc
     ; CHECK-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
diff --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir
index 808e61f517a11a3..9462d01ba758dfd 100644
--- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir
+++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir
@@ -33,7 +33,7 @@ body:             |
     ; GFX8-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 1048832, implicit-def dead $scc
     ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
     ; GFX8-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7
-    ; GFX8-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr4, 0, undef $vgpr2
+    ; GFX8-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, undef $vgpr2
     ; GFX8-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
     ; GFX8-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
     ; GFX8-NEXT: $vcc_lo = S_MOV_B32 8192
@@ -42,7 +42,7 @@ body:             |
     ; GFX8-NEXT: $vcc_lo = S_MOV_B32 16384
     ; GFX8-NEXT: $vgpr3, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr3, 0, implicit $exec
     ; GFX8-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec
-    ; GFX8-NEXT: $sgpr4 = V_READLANE_B32 $vgpr2, 0
+    ; GFX8-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 0
     ; GFX8-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
     ; GFX8-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 1048832, implicit-def dead $scc
     ; GFX8-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
@@ -50,6 +50,7 @@ body:             |
     ; GFX8-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
     ; GFX8-NEXT: $sgpr33 = COPY $sgpr4
     ; GFX8-NEXT: S_ENDPGM 0, amdgpu_allvgprs
+    ;
     ; GFX9-LABEL: name: pei_scavenge_vgpr_spill
     ; GFX9: liveins: $vgpr2, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
     ; GFX9-NEXT: {{  $}}
@@ -60,14 +61,14 @@ body:             |
     ; GFX9-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 1048832, implicit-def dead $scc
     ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
     ; GFX9-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7
-    ; GFX9-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr4, 0, undef $vgpr2
+    ; GFX9-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, undef $vgpr2
     ; GFX9-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
     ; GFX9-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
     ; GFX9-NEXT: $vgpr0 = V_ADD_U32_e32 8192, killed $vgpr0, implicit $exec
     ; GFX9-NEXT: $vgpr3 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
     ; GFX9-NEXT: $vgpr3 = V_ADD_U32_e32 16384, killed $vgpr3, implicit $exec
     ; GFX9-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec
-    ; GFX9-NEXT: $sgpr4 = V_READLANE_B32 $vgpr2, 0
+    ; GFX9-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 0
     ; GFX9-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
     ; GFX9-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 1048832, implicit-def dead $scc
     ; GFX9-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
@@ -75,6 +76,7 @@ body:             |
     ; GFX9-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
     ; GFX9-NEXT: $sgpr33 = COPY $sgpr4
     ; GFX9-NEXT: S_ENDPGM 0, amdgpu_allvgprs
+    ;
     ; GFX9-FLATSCR-LABEL: name: pei_scavenge_vgpr_spill
     ; GFX9-FLATSCR: liveins: $vgpr2, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
     ; GFX9-FLATSCR-NEXT: {{  $}}
@@ -85,13 +87,13 @@ body:             |
     ; GFX9-FLATSCR-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 16388, implicit-def dead $scc
     ; GFX9-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr2, killed $sgpr5, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.3, addrspace 5)
     ; GFX9-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7
-    ; GFX9-FLATSCR-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr4, 0, undef $vgpr2
+    ; GFX9-FLATSCR-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, undef $vgpr2
     ; GFX9-FLATSCR-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc
     ; GFX9-FLATSCR-NEXT: $sgpr4 = S_ADD_I32 $sgpr33, 8192, implicit-def $scc
     ; GFX9-FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr4, implicit $exec
     ; GFX9-FLATSCR-NEXT: $sgpr4 = S_ADD_I32 $sgpr33, 16384, implicit-def $scc
     ; GFX9-FLATSCR-NEXT: $vgpr0 = V_OR_B32_e32 killed $sgpr4, $vgpr1, implicit $exec
-    ; GFX9-FLATSCR-NEXT: $sgpr4 = V_READLANE_B32 $vgpr2, 0
+    ; GFX9-FLATSCR-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 0
     ; GFX9-FLATSCR-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
     ; GFX9-FLATSCR-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 16388, implicit-def dead $scc
     ; GFX9-FLATSCR-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR killed $sgpr5, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.3, addrspace 5)
diff --git a/llvm/test/CodeGen/AMDGPU/preserve-only-inactive-lane.mir b/llvm/test/CodeGen/AMDGPU/preserve-only-inactive-lane.mir
index 2f36bba5b75b985..4571e792c7cb587 100644
--- a/llvm/test/CodeGen/AMDGPU/preserve-only-inactive-lane.mir
+++ b/llvm/test/CodeGen/AMDGPU/preserve-only-inactive-lane.mir
@@ -23,17 +23,17 @@ body:             |
     ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
     ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
     ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
-    ; GCN-NEXT: renamable $vgpr0 = V_WRITELANE_B32 $sgpr35, 0, killed $vgpr0
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr0
     ; GCN-NEXT: $sgpr35 = S_MOV_B32 5
-    ; GCN-NEXT: $sgpr35 = V_READLANE_B32 $vgpr0, 0
+    ; GCN-NEXT: $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0
     ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e32 10, implicit $exec
     ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
     ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $vgpr0(tied-def 0) :: (load (s32) from %stack.0, addrspace 5)
     ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
     ; GCN-NEXT: S_SETPC_B64_return killed renamable $sgpr30_sgpr31, implicit $vgpr0
-    renamable $vgpr0 = V_WRITELANE_B32 $sgpr35, 0, killed $vgpr0
+    renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr0
     $sgpr35 = S_MOV_B32 5
-    $sgpr35 = V_READLANE_B32 $vgpr0, 0
+    $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0
     renamable $vgpr0 = V_MOV_B32_e32 10, implicit $exec
     S_SETPC_B64_return killed renamable $sgpr30_sgpr31, implicit $vgpr0
 ...
diff --git a/llvm/test/CodeGen/AMDGPU/same-slot-agpr-sgpr.mir b/llvm/test/CodeGen/AMDGPU/same-slot-agpr-sgpr.mir
index b323d880c9827ad..592e0f0cf0c2447 100644
--- a/llvm/test/CodeGen/AMDGPU/same-slot-agpr-sgpr.mir
+++ b/llvm/test/CodeGen/AMDGPU/same-slot-agpr-sgpr.mir
@@ -21,8 +21,8 @@ body: |
     ; CHECK-NEXT: $sgpr6_sgpr7 = S_MOV_B64 $exec
     ; CHECK-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
     ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-    ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, undef $vgpr0
-    ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr5, 1, $vgpr0, implicit killed $sgpr4_sgpr5
+    ; CHECK-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, undef $vgpr0, implicit $sgpr4_sgpr5
+    ; CHECK-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr5, 1, $vgpr0, implicit killed $sgpr4_sgpr5
     ; CHECK-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; CHECK-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
     ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7, implicit killed $vgpr0
@@ -54,8 +54,8 @@ body: |
     ; CHECK-NEXT: $sgpr6_sgpr7 = S_MOV_B64 $exec
     ; CHECK-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
     ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
-    ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, undef $vgpr0
-    ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr5, 1, $vgpr0, implicit killed $sgpr4_sgpr5
+    ; CHECK-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, undef $vgpr0, implicit $sgpr4_sgpr5
+    ; CHECK-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr5, 1, $vgpr0, implicit killed $sgpr4_sgpr5
     ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
     ; CHECK-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
     ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7, implicit killed $vgpr0
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-dead-frame-in-dbg-value.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-dead-frame-in-dbg-value.mir
index 341fe2d9b453fab..a7c841d54f7c97e 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-dead-frame-in-dbg-value.mir
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-dead-frame-in-dbg-value.mir
@@ -46,11 +46,11 @@ body:             |
   ; SGPR_SPILL-NEXT: {{  $}}
   ; SGPR_SPILL-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
   ; SGPR_SPILL-NEXT:   renamable $sgpr10 = IMPLICIT_DEF
-  ; SGPR_SPILL-NEXT:   [[V_WRITELANE_B32_:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 killed $sgpr10, 0, [[V_WRITELANE_B32_]]
+  ; SGPR_SPILL-NEXT:   [[V_WRITELANE_B32_:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr10, 0, [[V_WRITELANE_B32_]]
   ; SGPR_SPILL-NEXT:   DBG_VALUE $noreg, 0
   ; SGPR_SPILL-NEXT: {{  $}}
   ; SGPR_SPILL-NEXT: bb.1:
-  ; SGPR_SPILL-NEXT:   $sgpr10 = V_READLANE_B32 [[V_WRITELANE_B32_]], 0
+  ; SGPR_SPILL-NEXT:   $sgpr10 = SI_RESTORE_S32_FROM_VGPR [[V_WRITELANE_B32_]], 0
   ; SGPR_SPILL-NEXT:   KILL [[V_WRITELANE_B32_]]
   ; SGPR_SPILL-NEXT:   S_ENDPGM 0
   ; PEI-LABEL: name: test
@@ -59,12 +59,12 @@ body:             |
   ; PEI-NEXT: {{  $}}
   ; PEI-NEXT:   renamable $vgpr0 = IMPLICIT_DEF
   ; PEI-NEXT:   renamable $sgpr10 = IMPLICIT_DEF
-  ; PEI-NEXT:   renamable $vgpr0 = V_WRITELANE_B32 killed $sgpr10, 0, killed $vgpr0
+  ; PEI-NEXT:   renamable $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr10, 0, killed $vgpr0
   ; PEI-NEXT: {{  $}}
   ; PEI-NEXT: bb.1:
   ; PEI-NEXT:   liveins: $vgpr0
   ; PEI-NEXT: {{  $}}
-  ; PEI-NEXT:   $sgpr10 = V_READLANE_B32 $vgpr0, 0
+  ; PEI-NEXT:   $sgpr10 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0
   ; PEI-NEXT:   KILL killed renamable $vgpr0
   ; PEI-NEXT:   S_ENDPGM 0
   bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-partially-undef.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-partially-undef.mir
index 26a5eedc3eca3dc..055e859f5030085 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-partially-undef.mir
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-partially-undef.mir
@@ -23,8 +23,8 @@ body:             |
     ; CHECK: liveins: $sgpr4
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
-    ; CHECK-NEXT: [[V_WRITELANE_B32_:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr4, 0, [[V_WRITELANE_B32_]], implicit-def $sgpr4_sgpr5, implicit $sgpr4_sgpr5
-    ; CHECK-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr5, 1, [[V_WRITELANE_B32_1]], implicit $sgpr4_sgpr5
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, [[DEF]], implicit-def $sgpr4_sgpr5, implicit $sgpr4_sgpr5
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr5, 1, [[DEF]], implicit $sgpr4_sgpr5
     SI_SPILL_S64_SAVE renamable $sgpr4_sgpr5, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store (s64) into %stack.0, align 4, addrspace 5)
 
 ...
@@ -49,8 +49,8 @@ body:             |
     ; CHECK: liveins: $sgpr5
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
-    ; CHECK-NEXT: [[V_WRITELANE_B32_:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr4, 0, [[V_WRITELANE_B32_]], implicit-def $sgpr4_sgpr5, implicit $sgpr4_sgpr5
-    ; CHECK-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr5, 1, [[V_WRITELANE_B32_1]], implicit $sgpr4_sgpr5
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, [[DEF]], implicit-def $sgpr4_sgpr5, implicit $sgpr4_sgpr5
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr5, 1, [[DEF]], implicit $sgpr4_sgpr5
     SI_SPILL_S64_SAVE renamable $sgpr4_sgpr5, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store (s64) into %stack.0, align 4, addrspace 5)
 
 ...
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-to-vmem-scc-clobber.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-to-vmem-scc-clobber.mir
index 797231b3412a8f8..2c4b7a22facf436 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-to-vmem-scc-clobber.mir
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-to-vmem-scc-clobber.mir
@@ -27,7 +27,7 @@ body:             |
   ; VMEM-GFX8-NEXT:   $sgpr4_sgpr5 = S_MOV_B64 $exec
   ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 1, implicit-def $vgpr0
   ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
-  ; VMEM-GFX8-NEXT:   $vgpr0 = V_WRITELANE_B32 $sgpr8, 0, undef $vgpr0
+  ; VMEM-GFX8-NEXT:   $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr8, 0, undef $vgpr0
   ; VMEM-GFX8-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
   ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
   ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
@@ -77,8 +77,8 @@ body:             |
   ; VMEM-GFX8-NEXT:   $sgpr4_sgpr5 = S_MOV_B64 $exec
   ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 3, implicit-def $vgpr0
   ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
-  ; VMEM-GFX8-NEXT:   $vgpr0 = V_WRITELANE_B32 $sgpr8, 0, undef $vgpr0, implicit $sgpr8_sgpr9
-  ; VMEM-GFX8-NEXT:   $vgpr0 = V_WRITELANE_B32 $sgpr9, 1, $vgpr0, implicit $sgpr8_sgpr9
+  ; VMEM-GFX8-NEXT:   $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr8, 0, undef $vgpr0, implicit $sgpr8_sgpr9
+  ; VMEM-GFX8-NEXT:   $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr9, 1, $vgpr0, implicit $sgpr8_sgpr9
   ; VMEM-GFX8-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
   ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
   ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
@@ -128,7 +128,7 @@ body:             |
   ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
   ; VMEM-GFX8-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
   ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-  ; VMEM-GFX8-NEXT:   $sgpr8 = V_READLANE_B32 killed $vgpr0, 0
+  ; VMEM-GFX8-NEXT:   $sgpr8 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 0
   ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
   ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr0
   ; VMEM-GFX8-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
@@ -176,8 +176,8 @@ body:             |
   ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
   ; VMEM-GFX8-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
   ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-  ; VMEM-GFX8-NEXT:   $sgpr8 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr8_sgpr9
-  ; VMEM-GFX8-NEXT:   $sgpr9 = V_READLANE_B32 killed $vgpr0, 1
+  ; VMEM-GFX8-NEXT:   $sgpr8 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr8_sgpr9
+  ; VMEM-GFX8-NEXT:   $sgpr9 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 1
   ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
   ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr0
   ; VMEM-GFX8-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
@@ -224,7 +224,7 @@ body:             |
   ; VMEM-GFX8-NEXT:   $sgpr4_sgpr5 = S_MOV_B64 $exec
   ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 1
   ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
-  ; VMEM-GFX8-NEXT:   $vgpr0 = V_WRITELANE_B32 $sgpr8, 0, undef $vgpr0
+  ; VMEM-GFX8-NEXT:   $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr8, 0, undef $vgpr0
   ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
   ; VMEM-GFX8-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
   ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
@@ -280,7 +280,7 @@ body:             |
   ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
   ; VMEM-GFX8-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
   ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-  ; VMEM-GFX8-NEXT:   $sgpr8 = V_READLANE_B32 killed $vgpr0, 0
+  ; VMEM-GFX8-NEXT:   $sgpr8 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 0
   ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
   ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
   ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
@@ -333,8 +333,8 @@ body:             |
   ; VMEM-GFX8-NEXT:   $sgpr4_sgpr5 = S_MOV_B64 $exec
   ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 3
   ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
-  ; VMEM-GFX8-NEXT:   $vgpr0 = V_WRITELANE_B32 $sgpr8, 0, undef $vgpr0, implicit $sgpr8_sgpr9
-  ; VMEM-GFX8-NEXT:   $vgpr0 = V_WRITELANE_B32 $sgpr9, 1, $vgpr0, implicit $sgpr8_sgpr9
+  ; VMEM-GFX8-NEXT:   $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr8, 0, undef $vgpr0, implicit $sgpr8_sgpr9
+  ; VMEM-GFX8-NEXT:   $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr9, 1, $vgpr0, implicit $sgpr8_sgpr9
   ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
   ; VMEM-GFX8-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
   ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
@@ -390,8 +390,8 @@ body:             |
   ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
   ; VMEM-GFX8-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
   ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-  ; VMEM-GFX8-NEXT:   $sgpr8 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr8_sgpr9
-  ; VMEM-GFX8-NEXT:   $sgpr9 = V_READLANE_B32 killed $vgpr0, 1
+  ; VMEM-GFX8-NEXT:   $sgpr8 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr8_sgpr9
+  ; VMEM-GFX8-NEXT:   $sgpr9 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 1
   ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
   ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
   ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
@@ -447,7 +447,7 @@ body:             |
   ; VMEM-GFX8-NEXT:   $sgpr4_sgpr5 = S_MOV_B64 $exec
   ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 1
   ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
-  ; VMEM-GFX8-NEXT:   $vgpr0 = V_WRITELANE_B32 $sgpr8, 0, undef $vgpr0
+  ; VMEM-GFX8-NEXT:   $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr8, 0, undef $vgpr0
   ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
   ; VMEM-GFX8-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
   ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
@@ -457,7 +457,7 @@ body:             |
   ; VMEM-GFX8-NEXT:   $sgpr4_sgpr5 = S_MOV_B64 $exec
   ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 1
   ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
-  ; VMEM-GFX8-NEXT:   $vgpr0 = V_WRITELANE_B32 $sgpr9, 0, undef $vgpr0
+  ; VMEM-GFX8-NEXT:   $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr9, 0, undef $vgpr0
   ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
   ; VMEM-GFX8-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
   ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
@@ -514,7 +514,7 @@ body:             |
   ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
   ; VMEM-GFX8-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
   ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-  ; VMEM-GFX8-NEXT:   $sgpr8 = V_READLANE_B32 killed $vgpr0, 0
+  ; VMEM-GFX8-NEXT:   $sgpr8 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 0
   ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
   ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
   ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
@@ -524,7 +524,7 @@ body:             |
   ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
   ; VMEM-GFX8-NEXT:   $vgpr1 = V_MOV_B32_e32 16392, implicit $exec
   ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
-  ; VMEM-GFX8-NEXT:   $sgpr9 = V_READLANE_B32 killed $vgpr0, 0
+  ; VMEM-GFX8-NEXT:   $sgpr9 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 0
   ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
   ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
   ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-vmem-large-frame.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-vmem-large-frame.mir
index c44892baca366ed..cac9c85130a7b4d 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-vmem-large-frame.mir
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-vmem-large-frame.mir
@@ -29,7 +29,7 @@ body:             |
     ; CHECK-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec
     ; CHECK-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr1
     ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
-    ; CHECK-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr10, 0, undef $vgpr1
+    ; CHECK-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr10, 0, undef $vgpr1
     ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
     ; CHECK-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
     ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr1
@@ -37,7 +37,7 @@ body:             |
     ; CHECK-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr1
     ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
     ; CHECK-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
-    ; CHECK-NEXT: $sgpr10 = V_READLANE_B32 killed $vgpr1, 0
+    ; CHECK-NEXT: $sgpr10 = SI_RESTORE_S32_FROM_VGPR killed $vgpr1, 0
     ; CHECK-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
     ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr1
     ; CHECK-NEXT: S_SETPC_B64 $sgpr30_sgpr31, implicit $scc
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir
index a3fabfffde088b5..059eb6dad2c3130 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir
@@ -69,7 +69,7 @@ body:             |
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0
     ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr12, 0, undef $vgpr0
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr12, 0, undef $vgpr0
     ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
     ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
@@ -77,7 +77,7 @@ body:             |
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0
     ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0
     ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
     ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
@@ -85,8 +85,8 @@ body:             |
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
     ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit killed $sgpr12_sgpr13
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr13, 1, $vgpr0, implicit killed $sgpr12_sgpr13
     ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 8, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
     ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
@@ -94,8 +94,8 @@ body:             |
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
     ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13
     ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 8, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
     ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
@@ -103,9 +103,9 @@ body:             |
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 7, implicit-def $vgpr0
     ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr14, 2, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14
     ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 16, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
     ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
@@ -113,10 +113,10 @@ body:             |
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 15, implicit-def $vgpr0
     ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr15, 3, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15
     ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 28, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
     ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
@@ -124,11 +124,11 @@ body:             |
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 31, implicit-def $vgpr0
     ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr16, 4, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr16, 4, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
     ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 44, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
     ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
@@ -136,14 +136,14 @@ body:             |
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 255, implicit-def $vgpr0
     ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr16, 4, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr17, 5, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr18, 6, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr19, 7, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr16, 4, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr17, 5, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr18, 6, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr19, 7, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
     ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 64, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5)
     ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
@@ -151,22 +151,22 @@ body:             |
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 65535, implicit-def $vgpr0
     ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr16, 4, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr17, 5, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr18, 6, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr19, 7, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr20, 8, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr21, 9, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr22, 10, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr23, 11, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr24, 12, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr25, 13, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr26, 14, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr27, 15, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr16, 4, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr17, 5, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr18, 6, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr19, 7, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr20, 8, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr21, 9, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr22, 10, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr23, 11, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr24, 12, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr25, 13, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr26, 14, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr27, 15, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
     ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 96, 0, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5)
     ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
@@ -174,38 +174,38 @@ body:             |
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 4294967295, implicit-def $vgpr0
     ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr64, 0, undef $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr65, 1, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr66, 2, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr67, 3, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr68, 4, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr69, 5, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr70, 6, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr71, 7, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr72, 8, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr73, 9, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr74, 10, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr75, 11, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr76, 12, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr77, 13, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr78, 14, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr79, 15, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr80, 16, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr81, 17, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr82, 18, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr83, 19, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr84, 20, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr85, 21, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr86, 22, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr87, 23, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr88, 24, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr89, 25, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr90, 26, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr91, 27, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr92, 28, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr93, 29, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr94, 30, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr95, 31, $vgpr0, implicit killed $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr64, 0, undef $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr65, 1, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr66, 2, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr67, 3, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr68, 4, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr69, 5, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr70, 6, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr71, 7, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr72, 8, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr73, 9, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr74, 10, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr75, 11, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr76, 12, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr77, 13, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr78, 14, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr79, 15, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr80, 16, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr81, 17, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr82, 18, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr83, 19, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr84, 20, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr85, 21, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr86, 22, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr87, 23, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr88, 24, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr89, 25, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr90, 26, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr91, 27, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr92, 28, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr93, 29, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr94, 30, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr95, 31, $vgpr0, implicit killed $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
     ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 160, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5)
     ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
@@ -213,11 +213,12 @@ body:             |
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0
     ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0
+    ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0
     ; GCN64-MUBUF-NEXT: $sgpr2 = S_ADD_I32 $sgpr33, 262144, implicit-def dead $scc
     ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, killed $sgpr2, 0, 0, 0, implicit $exec :: (store (s32) into %stack.8, align 4096, addrspace 5)
     ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
+    ;
     ; GCN32-MUBUF-LABEL: name: check_spill
     ; GCN32-MUBUF: liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr11
     ; GCN32-MUBUF-NEXT: {{  $}}
@@ -232,7 +233,7 @@ body:             |
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 1, implicit-def $vgpr0
     ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr12, 0, undef $vgpr0
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr12, 0, undef $vgpr0
     ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
     ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
@@ -240,7 +241,7 @@ body:             |
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 1, implicit-def $vgpr0
     ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0
     ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
     ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
@@ -248,8 +249,8 @@ body:             |
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 3, implicit-def $vgpr0
     ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit killed $sgpr12_sgpr13
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr13, 1, $vgpr0, implicit killed $sgpr12_sgpr13
     ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
     ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
@@ -257,8 +258,8 @@ body:             |
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 3, implicit-def $vgpr0
     ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13
     ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
     ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
@@ -266,9 +267,9 @@ body:             |
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 7, implicit-def $vgpr0
     ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr14, 2, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14
     ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 16, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
     ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
@@ -276,10 +277,10 @@ body:             |
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 15, implicit-def $vgpr0
     ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr15, 3, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15
     ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 28, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
     ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
@@ -287,11 +288,11 @@ body:             |
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 31, implicit-def $vgpr0
     ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr16, 4, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr16, 4, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
     ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 44, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
     ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
@@ -299,14 +300,14 @@ body:             |
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 255, implicit-def $vgpr0
     ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr16, 4, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr17, 5, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr18, 6, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr19, 7, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr16, 4, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr17, 5, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr18, 6, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr19, 7, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
     ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 64, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5)
     ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
@@ -314,22 +315,22 @@ body:             |
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 65535, implicit-def $vgpr0
     ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr16, 4, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr17, 5, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr18, 6, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr19, 7, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr20, 8, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr21, 9, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr22, 10, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr23, 11, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr24, 12, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr25, 13, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr26, 14, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr27, 15, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr16, 4, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr17, 5, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr18, 6, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr19, 7, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr20, 8, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr21, 9, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr22, 10, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr23, 11, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr24, 12, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr25, 13, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr26, 14, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr27, 15, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
     ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 96, 0, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5)
     ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
@@ -337,38 +338,38 @@ body:             |
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 4294967295, implicit-def $vgpr0
     ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr64, 0, undef $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr65, 1, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr66, 2, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr67, 3, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr68, 4, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr69, 5, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr70, 6, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr71, 7, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr72, 8, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr73, 9, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr74, 10, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr75, 11, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr76, 12, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr77, 13, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr78, 14, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr79, 15, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr80, 16, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr81, 17, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr82, 18, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr83, 19, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr84, 20, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr85, 21, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr86, 22, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr87, 23, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr88, 24, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr89, 25, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr90, 26, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr91, 27, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr92, 28, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr93, 29, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr94, 30, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr95, 31, $vgpr0, implicit killed $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr64, 0, undef $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr65, 1, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr66, 2, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr67, 3, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr68, 4, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr69, 5, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr70, 6, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr71, 7, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr72, 8, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr73, 9, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr74, 10, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr75, 11, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr76, 12, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr77, 13, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr78, 14, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr79, 15, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr80, 16, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr81, 17, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr82, 18, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr83, 19, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr84, 20, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr85, 21, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr86, 22, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr87, 23, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr88, 24, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr89, 25, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr90, 26, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr91, 27, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr92, 28, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr93, 29, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr94, 30, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr95, 31, $vgpr0, implicit killed $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
     ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 160, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5)
     ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
@@ -376,11 +377,12 @@ body:             |
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 1, implicit-def $vgpr0
     ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0
+    ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0
     ; GCN32-MUBUF-NEXT: $sgpr1 = S_ADD_I32 $sgpr33, 131072, implicit-def dead $scc
     ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, killed $sgpr1, 0, 0, 0, implicit $exec :: (store (s32) into %stack.8, align 4096, addrspace 5)
     ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
+    ;
     ; GCN64-FLATSCR-LABEL: name: check_spill
     ; GCN64-FLATSCR: liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr11, $sgpr0_sgpr1
     ; GCN64-FLATSCR-NEXT: {{  $}}
@@ -391,7 +393,7 @@ body:             |
     ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0
     ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr12, 0, undef $vgpr0
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr12, 0, undef $vgpr0
     ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
@@ -399,7 +401,7 @@ body:             |
     ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0
     ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0
     ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
@@ -407,8 +409,8 @@ body:             |
     ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
     ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit killed $sgpr12_sgpr13
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr13, 1, $vgpr0, implicit killed $sgpr12_sgpr13
     ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
@@ -416,8 +418,8 @@ body:             |
     ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
     ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13
     ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
@@ -425,9 +427,9 @@ body:             |
     ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 7, implicit-def $vgpr0
     ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr14, 2, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14
     ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
@@ -435,10 +437,10 @@ body:             |
     ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 15, implicit-def $vgpr0
     ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr15, 3, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15
     ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 28, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.3, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
@@ -446,11 +448,11 @@ body:             |
     ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 31, implicit-def $vgpr0
     ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr16, 4, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr16, 4, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
     ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 44, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.4, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
@@ -458,14 +460,14 @@ body:             |
     ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 255, implicit-def $vgpr0
     ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr16, 4, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr17, 5, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr18, 6, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr19, 7, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr16, 4, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr17, 5, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr18, 6, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr19, 7, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
     ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 64, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.5, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
@@ -473,22 +475,22 @@ body:             |
     ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 65535, implicit-def $vgpr0
     ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr16, 4, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr17, 5, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr18, 6, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr19, 7, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr20, 8, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr21, 9, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr22, 10, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr23, 11, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr24, 12, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr25, 13, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr26, 14, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr27, 15, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr16, 4, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr17, 5, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr18, 6, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr19, 7, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr20, 8, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr21, 9, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr22, 10, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr23, 11, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr24, 12, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr25, 13, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr26, 14, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr27, 15, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
     ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 96, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.6, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
@@ -496,38 +498,38 @@ body:             |
     ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 4294967295, implicit-def $vgpr0
     ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr64, 0, undef $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr65, 1, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr66, 2, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr67, 3, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr68, 4, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr69, 5, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr70, 6, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr71, 7, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr72, 8, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr73, 9, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr74, 10, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr75, 11, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr76, 12, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr77, 13, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr78, 14, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr79, 15, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr80, 16, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr81, 17, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr82, 18, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr83, 19, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr84, 20, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr85, 21, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr86, 22, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr87, 23, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr88, 24, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr89, 25, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr90, 26, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr91, 27, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr92, 28, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr93, 29, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr94, 30, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr95, 31, $vgpr0, implicit killed $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr64, 0, undef $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr65, 1, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr66, 2, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr67, 3, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr68, 4, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr69, 5, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr70, 6, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr71, 7, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr72, 8, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr73, 9, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr74, 10, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr75, 11, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr76, 12, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr77, 13, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr78, 14, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr79, 15, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr80, 16, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr81, 17, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr82, 18, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr83, 19, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr84, 20, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr85, 21, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr86, 22, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr87, 23, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr88, 24, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr89, 25, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr90, 26, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr91, 27, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr92, 28, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr93, 29, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr94, 30, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr95, 31, $vgpr0, implicit killed $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
     ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 160, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.7, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
@@ -535,7 +537,7 @@ body:             |
     ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0
     ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0
     ; GCN64-FLATSCR-NEXT: $sgpr2 = S_ADD_I32 $sgpr33, 4096, implicit-def dead $scc
     ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, killed $sgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, align 4096, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
@@ -626,119 +628,119 @@ body:             |
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0
     ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 killed $vgpr0, 0
+    ; GCN64-MUBUF-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 0
     ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
     ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 8, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13
-    ; GCN64-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 killed $vgpr0, 1
+    ; GCN64-MUBUF-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr12_sgpr13
+    ; GCN64-MUBUF-NEXT: $sgpr13 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 1
     ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 7, implicit-def $vgpr0
     ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 16, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14
-    ; GCN64-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 $vgpr0, 1
-    ; GCN64-MUBUF-NEXT: $sgpr14 = V_READLANE_B32 killed $vgpr0, 2
+    ; GCN64-MUBUF-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14
+    ; GCN64-MUBUF-NEXT: $sgpr13 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 1
+    ; GCN64-MUBUF-NEXT: $sgpr14 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 2
     ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 15, implicit-def $vgpr0
     ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 28, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15
-    ; GCN64-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 $vgpr0, 1
-    ; GCN64-MUBUF-NEXT: $sgpr14 = V_READLANE_B32 $vgpr0, 2
-    ; GCN64-MUBUF-NEXT: $sgpr15 = V_READLANE_B32 killed $vgpr0, 3
+    ; GCN64-MUBUF-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15
+    ; GCN64-MUBUF-NEXT: $sgpr13 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 1
+    ; GCN64-MUBUF-NEXT: $sgpr14 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 2
+    ; GCN64-MUBUF-NEXT: $sgpr15 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 3
     ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 31, implicit-def $vgpr0
     ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 44, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
-    ; GCN64-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 $vgpr0, 1
-    ; GCN64-MUBUF-NEXT: $sgpr14 = V_READLANE_B32 $vgpr0, 2
-    ; GCN64-MUBUF-NEXT: $sgpr15 = V_READLANE_B32 $vgpr0, 3
-    ; GCN64-MUBUF-NEXT: $sgpr16 = V_READLANE_B32 killed $vgpr0, 4
+    ; GCN64-MUBUF-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+    ; GCN64-MUBUF-NEXT: $sgpr13 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 1
+    ; GCN64-MUBUF-NEXT: $sgpr14 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 2
+    ; GCN64-MUBUF-NEXT: $sgpr15 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 3
+    ; GCN64-MUBUF-NEXT: $sgpr16 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 4
     ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 255, implicit-def $vgpr0
     ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 64, 0, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN64-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 $vgpr0, 1
-    ; GCN64-MUBUF-NEXT: $sgpr14 = V_READLANE_B32 $vgpr0, 2
-    ; GCN64-MUBUF-NEXT: $sgpr15 = V_READLANE_B32 $vgpr0, 3
-    ; GCN64-MUBUF-NEXT: $sgpr16 = V_READLANE_B32 $vgpr0, 4
-    ; GCN64-MUBUF-NEXT: $sgpr17 = V_READLANE_B32 $vgpr0, 5
-    ; GCN64-MUBUF-NEXT: $sgpr18 = V_READLANE_B32 $vgpr0, 6
-    ; GCN64-MUBUF-NEXT: $sgpr19 = V_READLANE_B32 killed $vgpr0, 7
+    ; GCN64-MUBUF-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN64-MUBUF-NEXT: $sgpr13 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 1
+    ; GCN64-MUBUF-NEXT: $sgpr14 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 2
+    ; GCN64-MUBUF-NEXT: $sgpr15 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 3
+    ; GCN64-MUBUF-NEXT: $sgpr16 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 4
+    ; GCN64-MUBUF-NEXT: $sgpr17 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 5
+    ; GCN64-MUBUF-NEXT: $sgpr18 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 6
+    ; GCN64-MUBUF-NEXT: $sgpr19 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 7
     ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 65535, implicit-def $vgpr0
     ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 96, 0, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
-    ; GCN64-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 $vgpr0, 1
-    ; GCN64-MUBUF-NEXT: $sgpr14 = V_READLANE_B32 $vgpr0, 2
-    ; GCN64-MUBUF-NEXT: $sgpr15 = V_READLANE_B32 $vgpr0, 3
-    ; GCN64-MUBUF-NEXT: $sgpr16 = V_READLANE_B32 $vgpr0, 4
-    ; GCN64-MUBUF-NEXT: $sgpr17 = V_READLANE_B32 $vgpr0, 5
-    ; GCN64-MUBUF-NEXT: $sgpr18 = V_READLANE_B32 $vgpr0, 6
-    ; GCN64-MUBUF-NEXT: $sgpr19 = V_READLANE_B32 $vgpr0, 7
-    ; GCN64-MUBUF-NEXT: $sgpr20 = V_READLANE_B32 $vgpr0, 8
-    ; GCN64-MUBUF-NEXT: $sgpr21 = V_READLANE_B32 $vgpr0, 9
-    ; GCN64-MUBUF-NEXT: $sgpr22 = V_READLANE_B32 $vgpr0, 10
-    ; GCN64-MUBUF-NEXT: $sgpr23 = V_READLANE_B32 $vgpr0, 11
-    ; GCN64-MUBUF-NEXT: $sgpr24 = V_READLANE_B32 $vgpr0, 12
-    ; GCN64-MUBUF-NEXT: $sgpr25 = V_READLANE_B32 $vgpr0, 13
-    ; GCN64-MUBUF-NEXT: $sgpr26 = V_READLANE_B32 $vgpr0, 14
-    ; GCN64-MUBUF-NEXT: $sgpr27 = V_READLANE_B32 killed $vgpr0, 15
+    ; GCN64-MUBUF-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+    ; GCN64-MUBUF-NEXT: $sgpr13 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 1
+    ; GCN64-MUBUF-NEXT: $sgpr14 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 2
+    ; GCN64-MUBUF-NEXT: $sgpr15 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 3
+    ; GCN64-MUBUF-NEXT: $sgpr16 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 4
+    ; GCN64-MUBUF-NEXT: $sgpr17 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 5
+    ; GCN64-MUBUF-NEXT: $sgpr18 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 6
+    ; GCN64-MUBUF-NEXT: $sgpr19 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 7
+    ; GCN64-MUBUF-NEXT: $sgpr20 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 8
+    ; GCN64-MUBUF-NEXT: $sgpr21 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 9
+    ; GCN64-MUBUF-NEXT: $sgpr22 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 10
+    ; GCN64-MUBUF-NEXT: $sgpr23 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 11
+    ; GCN64-MUBUF-NEXT: $sgpr24 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 12
+    ; GCN64-MUBUF-NEXT: $sgpr25 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 13
+    ; GCN64-MUBUF-NEXT: $sgpr26 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 14
+    ; GCN64-MUBUF-NEXT: $sgpr27 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 15
     ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 4294967295, implicit-def $vgpr0
     ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 160, 0, 0, implicit $exec :: (load (s32) from %stack.7, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $sgpr64 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: $sgpr65 = V_READLANE_B32 $vgpr0, 1
-    ; GCN64-MUBUF-NEXT: $sgpr66 = V_READLANE_B32 $vgpr0, 2
-    ; GCN64-MUBUF-NEXT: $sgpr67 = V_READLANE_B32 $vgpr0, 3
-    ; GCN64-MUBUF-NEXT: $sgpr68 = V_READLANE_B32 $vgpr0, 4
-    ; GCN64-MUBUF-NEXT: $sgpr69 = V_READLANE_B32 $vgpr0, 5
-    ; GCN64-MUBUF-NEXT: $sgpr70 = V_READLANE_B32 $vgpr0, 6
-    ; GCN64-MUBUF-NEXT: $sgpr71 = V_READLANE_B32 $vgpr0, 7
-    ; GCN64-MUBUF-NEXT: $sgpr72 = V_READLANE_B32 $vgpr0, 8
-    ; GCN64-MUBUF-NEXT: $sgpr73 = V_READLANE_B32 $vgpr0, 9
-    ; GCN64-MUBUF-NEXT: $sgpr74 = V_READLANE_B32 $vgpr0, 10
-    ; GCN64-MUBUF-NEXT: $sgpr75 = V_READLANE_B32 $vgpr0, 11
-    ; GCN64-MUBUF-NEXT: $sgpr76 = V_READLANE_B32 $vgpr0, 12
-    ; GCN64-MUBUF-NEXT: $sgpr77 = V_READLANE_B32 $vgpr0, 13
-    ; GCN64-MUBUF-NEXT: $sgpr78 = V_READLANE_B32 $vgpr0, 14
-    ; GCN64-MUBUF-NEXT: $sgpr79 = V_READLANE_B32 $vgpr0, 15
-    ; GCN64-MUBUF-NEXT: $sgpr80 = V_READLANE_B32 $vgpr0, 16
-    ; GCN64-MUBUF-NEXT: $sgpr81 = V_READLANE_B32 $vgpr0, 17
-    ; GCN64-MUBUF-NEXT: $sgpr82 = V_READLANE_B32 $vgpr0, 18
-    ; GCN64-MUBUF-NEXT: $sgpr83 = V_READLANE_B32 $vgpr0, 19
-    ; GCN64-MUBUF-NEXT: $sgpr84 = V_READLANE_B32 $vgpr0, 20
-    ; GCN64-MUBUF-NEXT: $sgpr85 = V_READLANE_B32 $vgpr0, 21
-    ; GCN64-MUBUF-NEXT: $sgpr86 = V_READLANE_B32 $vgpr0, 22
-    ; GCN64-MUBUF-NEXT: $sgpr87 = V_READLANE_B32 $vgpr0, 23
-    ; GCN64-MUBUF-NEXT: $sgpr88 = V_READLANE_B32 $vgpr0, 24
-    ; GCN64-MUBUF-NEXT: $sgpr89 = V_READLANE_B32 $vgpr0, 25
-    ; GCN64-MUBUF-NEXT: $sgpr90 = V_READLANE_B32 $vgpr0, 26
-    ; GCN64-MUBUF-NEXT: $sgpr91 = V_READLANE_B32 $vgpr0, 27
-    ; GCN64-MUBUF-NEXT: $sgpr92 = V_READLANE_B32 $vgpr0, 28
-    ; GCN64-MUBUF-NEXT: $sgpr93 = V_READLANE_B32 $vgpr0, 29
-    ; GCN64-MUBUF-NEXT: $sgpr94 = V_READLANE_B32 $vgpr0, 30
-    ; GCN64-MUBUF-NEXT: $sgpr95 = V_READLANE_B32 killed $vgpr0, 31
+    ; GCN64-MUBUF-NEXT: $sgpr64 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-MUBUF-NEXT: $sgpr65 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 1
+    ; GCN64-MUBUF-NEXT: $sgpr66 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 2
+    ; GCN64-MUBUF-NEXT: $sgpr67 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 3
+    ; GCN64-MUBUF-NEXT: $sgpr68 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 4
+    ; GCN64-MUBUF-NEXT: $sgpr69 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 5
+    ; GCN64-MUBUF-NEXT: $sgpr70 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 6
+    ; GCN64-MUBUF-NEXT: $sgpr71 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 7
+    ; GCN64-MUBUF-NEXT: $sgpr72 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 8
+    ; GCN64-MUBUF-NEXT: $sgpr73 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 9
+    ; GCN64-MUBUF-NEXT: $sgpr74 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 10
+    ; GCN64-MUBUF-NEXT: $sgpr75 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 11
+    ; GCN64-MUBUF-NEXT: $sgpr76 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 12
+    ; GCN64-MUBUF-NEXT: $sgpr77 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 13
+    ; GCN64-MUBUF-NEXT: $sgpr78 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 14
+    ; GCN64-MUBUF-NEXT: $sgpr79 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 15
+    ; GCN64-MUBUF-NEXT: $sgpr80 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 16
+    ; GCN64-MUBUF-NEXT: $sgpr81 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 17
+    ; GCN64-MUBUF-NEXT: $sgpr82 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 18
+    ; GCN64-MUBUF-NEXT: $sgpr83 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 19
+    ; GCN64-MUBUF-NEXT: $sgpr84 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 20
+    ; GCN64-MUBUF-NEXT: $sgpr85 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 21
+    ; GCN64-MUBUF-NEXT: $sgpr86 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 22
+    ; GCN64-MUBUF-NEXT: $sgpr87 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 23
+    ; GCN64-MUBUF-NEXT: $sgpr88 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 24
+    ; GCN64-MUBUF-NEXT: $sgpr89 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 25
+    ; GCN64-MUBUF-NEXT: $sgpr90 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 26
+    ; GCN64-MUBUF-NEXT: $sgpr91 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 27
+    ; GCN64-MUBUF-NEXT: $sgpr92 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 28
+    ; GCN64-MUBUF-NEXT: $sgpr93 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 29
+    ; GCN64-MUBUF-NEXT: $sgpr94 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 30
+    ; GCN64-MUBUF-NEXT: $sgpr95 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 31
     ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
@@ -746,9 +748,10 @@ body:             |
     ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-MUBUF-NEXT: $sgpr2 = S_ADD_I32 $sgpr33, 262144, implicit-def dead $scc
     ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, killed $sgpr2, 0, 0, 0, implicit $exec :: (load (s32) from %stack.8, align 4096, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 killed $vgpr0, 0
+    ; GCN64-MUBUF-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 0
     ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
+    ;
     ; GCN32-MUBUF-LABEL: name: check_reload
     ; GCN32-MUBUF: liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr11
     ; GCN32-MUBUF-NEXT: {{  $}}
@@ -763,119 +766,119 @@ body:             |
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 1, implicit-def $vgpr0
     ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
     ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 killed $vgpr0, 0
+    ; GCN32-MUBUF-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 0
     ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 3, implicit-def $vgpr0
     ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
     ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13
-    ; GCN32-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 killed $vgpr0, 1
+    ; GCN32-MUBUF-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr12_sgpr13
+    ; GCN32-MUBUF-NEXT: $sgpr13 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 1
     ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 7, implicit-def $vgpr0
     ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
     ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 16, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14
-    ; GCN32-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 $vgpr0, 1
-    ; GCN32-MUBUF-NEXT: $sgpr14 = V_READLANE_B32 killed $vgpr0, 2
+    ; GCN32-MUBUF-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14
+    ; GCN32-MUBUF-NEXT: $sgpr13 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 1
+    ; GCN32-MUBUF-NEXT: $sgpr14 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 2
     ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 15, implicit-def $vgpr0
     ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
     ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 28, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15
-    ; GCN32-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 $vgpr0, 1
-    ; GCN32-MUBUF-NEXT: $sgpr14 = V_READLANE_B32 $vgpr0, 2
-    ; GCN32-MUBUF-NEXT: $sgpr15 = V_READLANE_B32 killed $vgpr0, 3
+    ; GCN32-MUBUF-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15
+    ; GCN32-MUBUF-NEXT: $sgpr13 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 1
+    ; GCN32-MUBUF-NEXT: $sgpr14 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 2
+    ; GCN32-MUBUF-NEXT: $sgpr15 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 3
     ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 31, implicit-def $vgpr0
     ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
     ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 44, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
-    ; GCN32-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 $vgpr0, 1
-    ; GCN32-MUBUF-NEXT: $sgpr14 = V_READLANE_B32 $vgpr0, 2
-    ; GCN32-MUBUF-NEXT: $sgpr15 = V_READLANE_B32 $vgpr0, 3
-    ; GCN32-MUBUF-NEXT: $sgpr16 = V_READLANE_B32 killed $vgpr0, 4
+    ; GCN32-MUBUF-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+    ; GCN32-MUBUF-NEXT: $sgpr13 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 1
+    ; GCN32-MUBUF-NEXT: $sgpr14 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 2
+    ; GCN32-MUBUF-NEXT: $sgpr15 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 3
+    ; GCN32-MUBUF-NEXT: $sgpr16 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 4
     ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 255, implicit-def $vgpr0
     ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
     ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 64, 0, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN32-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 $vgpr0, 1
-    ; GCN32-MUBUF-NEXT: $sgpr14 = V_READLANE_B32 $vgpr0, 2
-    ; GCN32-MUBUF-NEXT: $sgpr15 = V_READLANE_B32 $vgpr0, 3
-    ; GCN32-MUBUF-NEXT: $sgpr16 = V_READLANE_B32 $vgpr0, 4
-    ; GCN32-MUBUF-NEXT: $sgpr17 = V_READLANE_B32 $vgpr0, 5
-    ; GCN32-MUBUF-NEXT: $sgpr18 = V_READLANE_B32 $vgpr0, 6
-    ; GCN32-MUBUF-NEXT: $sgpr19 = V_READLANE_B32 killed $vgpr0, 7
+    ; GCN32-MUBUF-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN32-MUBUF-NEXT: $sgpr13 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 1
+    ; GCN32-MUBUF-NEXT: $sgpr14 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 2
+    ; GCN32-MUBUF-NEXT: $sgpr15 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 3
+    ; GCN32-MUBUF-NEXT: $sgpr16 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 4
+    ; GCN32-MUBUF-NEXT: $sgpr17 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 5
+    ; GCN32-MUBUF-NEXT: $sgpr18 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 6
+    ; GCN32-MUBUF-NEXT: $sgpr19 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 7
     ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 65535, implicit-def $vgpr0
     ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
     ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 96, 0, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
-    ; GCN32-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 $vgpr0, 1
-    ; GCN32-MUBUF-NEXT: $sgpr14 = V_READLANE_B32 $vgpr0, 2
-    ; GCN32-MUBUF-NEXT: $sgpr15 = V_READLANE_B32 $vgpr0, 3
-    ; GCN32-MUBUF-NEXT: $sgpr16 = V_READLANE_B32 $vgpr0, 4
-    ; GCN32-MUBUF-NEXT: $sgpr17 = V_READLANE_B32 $vgpr0, 5
-    ; GCN32-MUBUF-NEXT: $sgpr18 = V_READLANE_B32 $vgpr0, 6
-    ; GCN32-MUBUF-NEXT: $sgpr19 = V_READLANE_B32 $vgpr0, 7
-    ; GCN32-MUBUF-NEXT: $sgpr20 = V_READLANE_B32 $vgpr0, 8
-    ; GCN32-MUBUF-NEXT: $sgpr21 = V_READLANE_B32 $vgpr0, 9
-    ; GCN32-MUBUF-NEXT: $sgpr22 = V_READLANE_B32 $vgpr0, 10
-    ; GCN32-MUBUF-NEXT: $sgpr23 = V_READLANE_B32 $vgpr0, 11
-    ; GCN32-MUBUF-NEXT: $sgpr24 = V_READLANE_B32 $vgpr0, 12
-    ; GCN32-MUBUF-NEXT: $sgpr25 = V_READLANE_B32 $vgpr0, 13
-    ; GCN32-MUBUF-NEXT: $sgpr26 = V_READLANE_B32 $vgpr0, 14
-    ; GCN32-MUBUF-NEXT: $sgpr27 = V_READLANE_B32 killed $vgpr0, 15
+    ; GCN32-MUBUF-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+    ; GCN32-MUBUF-NEXT: $sgpr13 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 1
+    ; GCN32-MUBUF-NEXT: $sgpr14 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 2
+    ; GCN32-MUBUF-NEXT: $sgpr15 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 3
+    ; GCN32-MUBUF-NEXT: $sgpr16 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 4
+    ; GCN32-MUBUF-NEXT: $sgpr17 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 5
+    ; GCN32-MUBUF-NEXT: $sgpr18 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 6
+    ; GCN32-MUBUF-NEXT: $sgpr19 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 7
+    ; GCN32-MUBUF-NEXT: $sgpr20 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 8
+    ; GCN32-MUBUF-NEXT: $sgpr21 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 9
+    ; GCN32-MUBUF-NEXT: $sgpr22 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 10
+    ; GCN32-MUBUF-NEXT: $sgpr23 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 11
+    ; GCN32-MUBUF-NEXT: $sgpr24 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 12
+    ; GCN32-MUBUF-NEXT: $sgpr25 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 13
+    ; GCN32-MUBUF-NEXT: $sgpr26 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 14
+    ; GCN32-MUBUF-NEXT: $sgpr27 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 15
     ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 4294967295, implicit-def $vgpr0
     ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
     ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 160, 0, 0, implicit $exec :: (load (s32) from %stack.7, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $sgpr64 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: $sgpr65 = V_READLANE_B32 $vgpr0, 1
-    ; GCN32-MUBUF-NEXT: $sgpr66 = V_READLANE_B32 $vgpr0, 2
-    ; GCN32-MUBUF-NEXT: $sgpr67 = V_READLANE_B32 $vgpr0, 3
-    ; GCN32-MUBUF-NEXT: $sgpr68 = V_READLANE_B32 $vgpr0, 4
-    ; GCN32-MUBUF-NEXT: $sgpr69 = V_READLANE_B32 $vgpr0, 5
-    ; GCN32-MUBUF-NEXT: $sgpr70 = V_READLANE_B32 $vgpr0, 6
-    ; GCN32-MUBUF-NEXT: $sgpr71 = V_READLANE_B32 $vgpr0, 7
-    ; GCN32-MUBUF-NEXT: $sgpr72 = V_READLANE_B32 $vgpr0, 8
-    ; GCN32-MUBUF-NEXT: $sgpr73 = V_READLANE_B32 $vgpr0, 9
-    ; GCN32-MUBUF-NEXT: $sgpr74 = V_READLANE_B32 $vgpr0, 10
-    ; GCN32-MUBUF-NEXT: $sgpr75 = V_READLANE_B32 $vgpr0, 11
-    ; GCN32-MUBUF-NEXT: $sgpr76 = V_READLANE_B32 $vgpr0, 12
-    ; GCN32-MUBUF-NEXT: $sgpr77 = V_READLANE_B32 $vgpr0, 13
-    ; GCN32-MUBUF-NEXT: $sgpr78 = V_READLANE_B32 $vgpr0, 14
-    ; GCN32-MUBUF-NEXT: $sgpr79 = V_READLANE_B32 $vgpr0, 15
-    ; GCN32-MUBUF-NEXT: $sgpr80 = V_READLANE_B32 $vgpr0, 16
-    ; GCN32-MUBUF-NEXT: $sgpr81 = V_READLANE_B32 $vgpr0, 17
-    ; GCN32-MUBUF-NEXT: $sgpr82 = V_READLANE_B32 $vgpr0, 18
-    ; GCN32-MUBUF-NEXT: $sgpr83 = V_READLANE_B32 $vgpr0, 19
-    ; GCN32-MUBUF-NEXT: $sgpr84 = V_READLANE_B32 $vgpr0, 20
-    ; GCN32-MUBUF-NEXT: $sgpr85 = V_READLANE_B32 $vgpr0, 21
-    ; GCN32-MUBUF-NEXT: $sgpr86 = V_READLANE_B32 $vgpr0, 22
-    ; GCN32-MUBUF-NEXT: $sgpr87 = V_READLANE_B32 $vgpr0, 23
-    ; GCN32-MUBUF-NEXT: $sgpr88 = V_READLANE_B32 $vgpr0, 24
-    ; GCN32-MUBUF-NEXT: $sgpr89 = V_READLANE_B32 $vgpr0, 25
-    ; GCN32-MUBUF-NEXT: $sgpr90 = V_READLANE_B32 $vgpr0, 26
-    ; GCN32-MUBUF-NEXT: $sgpr91 = V_READLANE_B32 $vgpr0, 27
-    ; GCN32-MUBUF-NEXT: $sgpr92 = V_READLANE_B32 $vgpr0, 28
-    ; GCN32-MUBUF-NEXT: $sgpr93 = V_READLANE_B32 $vgpr0, 29
-    ; GCN32-MUBUF-NEXT: $sgpr94 = V_READLANE_B32 $vgpr0, 30
-    ; GCN32-MUBUF-NEXT: $sgpr95 = V_READLANE_B32 killed $vgpr0, 31
+    ; GCN32-MUBUF-NEXT: $sgpr64 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN32-MUBUF-NEXT: $sgpr65 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 1
+    ; GCN32-MUBUF-NEXT: $sgpr66 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 2
+    ; GCN32-MUBUF-NEXT: $sgpr67 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 3
+    ; GCN32-MUBUF-NEXT: $sgpr68 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 4
+    ; GCN32-MUBUF-NEXT: $sgpr69 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 5
+    ; GCN32-MUBUF-NEXT: $sgpr70 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 6
+    ; GCN32-MUBUF-NEXT: $sgpr71 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 7
+    ; GCN32-MUBUF-NEXT: $sgpr72 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 8
+    ; GCN32-MUBUF-NEXT: $sgpr73 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 9
+    ; GCN32-MUBUF-NEXT: $sgpr74 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 10
+    ; GCN32-MUBUF-NEXT: $sgpr75 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 11
+    ; GCN32-MUBUF-NEXT: $sgpr76 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 12
+    ; GCN32-MUBUF-NEXT: $sgpr77 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 13
+    ; GCN32-MUBUF-NEXT: $sgpr78 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 14
+    ; GCN32-MUBUF-NEXT: $sgpr79 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 15
+    ; GCN32-MUBUF-NEXT: $sgpr80 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 16
+    ; GCN32-MUBUF-NEXT: $sgpr81 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 17
+    ; GCN32-MUBUF-NEXT: $sgpr82 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 18
+    ; GCN32-MUBUF-NEXT: $sgpr83 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 19
+    ; GCN32-MUBUF-NEXT: $sgpr84 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 20
+    ; GCN32-MUBUF-NEXT: $sgpr85 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 21
+    ; GCN32-MUBUF-NEXT: $sgpr86 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 22
+    ; GCN32-MUBUF-NEXT: $sgpr87 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 23
+    ; GCN32-MUBUF-NEXT: $sgpr88 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 24
+    ; GCN32-MUBUF-NEXT: $sgpr89 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 25
+    ; GCN32-MUBUF-NEXT: $sgpr90 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 26
+    ; GCN32-MUBUF-NEXT: $sgpr91 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 27
+    ; GCN32-MUBUF-NEXT: $sgpr92 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 28
+    ; GCN32-MUBUF-NEXT: $sgpr93 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 29
+    ; GCN32-MUBUF-NEXT: $sgpr94 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 30
+    ; GCN32-MUBUF-NEXT: $sgpr95 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 31
     ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
@@ -883,9 +886,10 @@ body:             |
     ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
     ; GCN32-MUBUF-NEXT: $sgpr1 = S_ADD_I32 $sgpr33, 131072, implicit-def dead $scc
     ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, killed $sgpr1, 0, 0, 0, implicit $exec :: (load (s32) from %stack.8, align 4096, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 killed $vgpr0, 0
+    ; GCN32-MUBUF-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 0
     ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
+    ;
     ; GCN64-FLATSCR-LABEL: name: check_reload
     ; GCN64-FLATSCR: liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr11, $sgpr0_sgpr1
     ; GCN64-FLATSCR-NEXT: {{  $}}
@@ -896,119 +900,119 @@ body:             |
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0
     ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $sgpr12 = V_READLANE_B32 killed $vgpr0, 0
+    ; GCN64-FLATSCR-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 0
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
     ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13
-    ; GCN64-FLATSCR-NEXT: $sgpr13 = V_READLANE_B32 killed $vgpr0, 1
+    ; GCN64-FLATSCR-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr12_sgpr13
+    ; GCN64-FLATSCR-NEXT: $sgpr13 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 1
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 7, implicit-def $vgpr0
     ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14
-    ; GCN64-FLATSCR-NEXT: $sgpr13 = V_READLANE_B32 $vgpr0, 1
-    ; GCN64-FLATSCR-NEXT: $sgpr14 = V_READLANE_B32 killed $vgpr0, 2
+    ; GCN64-FLATSCR-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14
+    ; GCN64-FLATSCR-NEXT: $sgpr13 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 1
+    ; GCN64-FLATSCR-NEXT: $sgpr14 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 2
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 15, implicit-def $vgpr0
     ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 28, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.3, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15
-    ; GCN64-FLATSCR-NEXT: $sgpr13 = V_READLANE_B32 $vgpr0, 1
-    ; GCN64-FLATSCR-NEXT: $sgpr14 = V_READLANE_B32 $vgpr0, 2
-    ; GCN64-FLATSCR-NEXT: $sgpr15 = V_READLANE_B32 killed $vgpr0, 3
+    ; GCN64-FLATSCR-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15
+    ; GCN64-FLATSCR-NEXT: $sgpr13 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 1
+    ; GCN64-FLATSCR-NEXT: $sgpr14 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 2
+    ; GCN64-FLATSCR-NEXT: $sgpr15 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 3
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 31, implicit-def $vgpr0
     ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 44, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.4, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
-    ; GCN64-FLATSCR-NEXT: $sgpr13 = V_READLANE_B32 $vgpr0, 1
-    ; GCN64-FLATSCR-NEXT: $sgpr14 = V_READLANE_B32 $vgpr0, 2
-    ; GCN64-FLATSCR-NEXT: $sgpr15 = V_READLANE_B32 $vgpr0, 3
-    ; GCN64-FLATSCR-NEXT: $sgpr16 = V_READLANE_B32 killed $vgpr0, 4
+    ; GCN64-FLATSCR-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+    ; GCN64-FLATSCR-NEXT: $sgpr13 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 1
+    ; GCN64-FLATSCR-NEXT: $sgpr14 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 2
+    ; GCN64-FLATSCR-NEXT: $sgpr15 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 3
+    ; GCN64-FLATSCR-NEXT: $sgpr16 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 4
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 255, implicit-def $vgpr0
     ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 64, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.5, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN64-FLATSCR-NEXT: $sgpr13 = V_READLANE_B32 $vgpr0, 1
-    ; GCN64-FLATSCR-NEXT: $sgpr14 = V_READLANE_B32 $vgpr0, 2
-    ; GCN64-FLATSCR-NEXT: $sgpr15 = V_READLANE_B32 $vgpr0, 3
-    ; GCN64-FLATSCR-NEXT: $sgpr16 = V_READLANE_B32 $vgpr0, 4
-    ; GCN64-FLATSCR-NEXT: $sgpr17 = V_READLANE_B32 $vgpr0, 5
-    ; GCN64-FLATSCR-NEXT: $sgpr18 = V_READLANE_B32 $vgpr0, 6
-    ; GCN64-FLATSCR-NEXT: $sgpr19 = V_READLANE_B32 killed $vgpr0, 7
+    ; GCN64-FLATSCR-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+    ; GCN64-FLATSCR-NEXT: $sgpr13 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 1
+    ; GCN64-FLATSCR-NEXT: $sgpr14 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 2
+    ; GCN64-FLATSCR-NEXT: $sgpr15 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 3
+    ; GCN64-FLATSCR-NEXT: $sgpr16 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 4
+    ; GCN64-FLATSCR-NEXT: $sgpr17 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 5
+    ; GCN64-FLATSCR-NEXT: $sgpr18 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 6
+    ; GCN64-FLATSCR-NEXT: $sgpr19 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 7
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 65535, implicit-def $vgpr0
     ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 96, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.6, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
-    ; GCN64-FLATSCR-NEXT: $sgpr13 = V_READLANE_B32 $vgpr0, 1
-    ; GCN64-FLATSCR-NEXT: $sgpr14 = V_READLANE_B32 $vgpr0, 2
-    ; GCN64-FLATSCR-NEXT: $sgpr15 = V_READLANE_B32 $vgpr0, 3
-    ; GCN64-FLATSCR-NEXT: $sgpr16 = V_READLANE_B32 $vgpr0, 4
-    ; GCN64-FLATSCR-NEXT: $sgpr17 = V_READLANE_B32 $vgpr0, 5
-    ; GCN64-FLATSCR-NEXT: $sgpr18 = V_READLANE_B32 $vgpr0, 6
-    ; GCN64-FLATSCR-NEXT: $sgpr19 = V_READLANE_B32 $vgpr0, 7
-    ; GCN64-FLATSCR-NEXT: $sgpr20 = V_READLANE_B32 $vgpr0, 8
-    ; GCN64-FLATSCR-NEXT: $sgpr21 = V_READLANE_B32 $vgpr0, 9
-    ; GCN64-FLATSCR-NEXT: $sgpr22 = V_READLANE_B32 $vgpr0, 10
-    ; GCN64-FLATSCR-NEXT: $sgpr23 = V_READLANE_B32 $vgpr0, 11
-    ; GCN64-FLATSCR-NEXT: $sgpr24 = V_READLANE_B32 $vgpr0, 12
-    ; GCN64-FLATSCR-NEXT: $sgpr25 = V_READLANE_B32 $vgpr0, 13
-    ; GCN64-FLATSCR-NEXT: $sgpr26 = V_READLANE_B32 $vgpr0, 14
-    ; GCN64-FLATSCR-NEXT: $sgpr27 = V_READLANE_B32 killed $vgpr0, 15
+    ; GCN64-FLATSCR-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+    ; GCN64-FLATSCR-NEXT: $sgpr13 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 1
+    ; GCN64-FLATSCR-NEXT: $sgpr14 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 2
+    ; GCN64-FLATSCR-NEXT: $sgpr15 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 3
+    ; GCN64-FLATSCR-NEXT: $sgpr16 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 4
+    ; GCN64-FLATSCR-NEXT: $sgpr17 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 5
+    ; GCN64-FLATSCR-NEXT: $sgpr18 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 6
+    ; GCN64-FLATSCR-NEXT: $sgpr19 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 7
+    ; GCN64-FLATSCR-NEXT: $sgpr20 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 8
+    ; GCN64-FLATSCR-NEXT: $sgpr21 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 9
+    ; GCN64-FLATSCR-NEXT: $sgpr22 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 10
+    ; GCN64-FLATSCR-NEXT: $sgpr23 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 11
+    ; GCN64-FLATSCR-NEXT: $sgpr24 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 12
+    ; GCN64-FLATSCR-NEXT: $sgpr25 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 13
+    ; GCN64-FLATSCR-NEXT: $sgpr26 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 14
+    ; GCN64-FLATSCR-NEXT: $sgpr27 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 15
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 4294967295, implicit-def $vgpr0
     ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 160, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.7, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $sgpr64 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: $sgpr65 = V_READLANE_B32 $vgpr0, 1
-    ; GCN64-FLATSCR-NEXT: $sgpr66 = V_READLANE_B32 $vgpr0, 2
-    ; GCN64-FLATSCR-NEXT: $sgpr67 = V_READLANE_B32 $vgpr0, 3
-    ; GCN64-FLATSCR-NEXT: $sgpr68 = V_READLANE_B32 $vgpr0, 4
-    ; GCN64-FLATSCR-NEXT: $sgpr69 = V_READLANE_B32 $vgpr0, 5
-    ; GCN64-FLATSCR-NEXT: $sgpr70 = V_READLANE_B32 $vgpr0, 6
-    ; GCN64-FLATSCR-NEXT: $sgpr71 = V_READLANE_B32 $vgpr0, 7
-    ; GCN64-FLATSCR-NEXT: $sgpr72 = V_READLANE_B32 $vgpr0, 8
-    ; GCN64-FLATSCR-NEXT: $sgpr73 = V_READLANE_B32 $vgpr0, 9
-    ; GCN64-FLATSCR-NEXT: $sgpr74 = V_READLANE_B32 $vgpr0, 10
-    ; GCN64-FLATSCR-NEXT: $sgpr75 = V_READLANE_B32 $vgpr0, 11
-    ; GCN64-FLATSCR-NEXT: $sgpr76 = V_READLANE_B32 $vgpr0, 12
-    ; GCN64-FLATSCR-NEXT: $sgpr77 = V_READLANE_B32 $vgpr0, 13
-    ; GCN64-FLATSCR-NEXT: $sgpr78 = V_READLANE_B32 $vgpr0, 14
-    ; GCN64-FLATSCR-NEXT: $sgpr79 = V_READLANE_B32 $vgpr0, 15
-    ; GCN64-FLATSCR-NEXT: $sgpr80 = V_READLANE_B32 $vgpr0, 16
-    ; GCN64-FLATSCR-NEXT: $sgpr81 = V_READLANE_B32 $vgpr0, 17
-    ; GCN64-FLATSCR-NEXT: $sgpr82 = V_READLANE_B32 $vgpr0, 18
-    ; GCN64-FLATSCR-NEXT: $sgpr83 = V_READLANE_B32 $vgpr0, 19
-    ; GCN64-FLATSCR-NEXT: $sgpr84 = V_READLANE_B32 $vgpr0, 20
-    ; GCN64-FLATSCR-NEXT: $sgpr85 = V_READLANE_B32 $vgpr0, 21
-    ; GCN64-FLATSCR-NEXT: $sgpr86 = V_READLANE_B32 $vgpr0, 22
-    ; GCN64-FLATSCR-NEXT: $sgpr87 = V_READLANE_B32 $vgpr0, 23
-    ; GCN64-FLATSCR-NEXT: $sgpr88 = V_READLANE_B32 $vgpr0, 24
-    ; GCN64-FLATSCR-NEXT: $sgpr89 = V_READLANE_B32 $vgpr0, 25
-    ; GCN64-FLATSCR-NEXT: $sgpr90 = V_READLANE_B32 $vgpr0, 26
-    ; GCN64-FLATSCR-NEXT: $sgpr91 = V_READLANE_B32 $vgpr0, 27
-    ; GCN64-FLATSCR-NEXT: $sgpr92 = V_READLANE_B32 $vgpr0, 28
-    ; GCN64-FLATSCR-NEXT: $sgpr93 = V_READLANE_B32 $vgpr0, 29
-    ; GCN64-FLATSCR-NEXT: $sgpr94 = V_READLANE_B32 $vgpr0, 30
-    ; GCN64-FLATSCR-NEXT: $sgpr95 = V_READLANE_B32 killed $vgpr0, 31
+    ; GCN64-FLATSCR-NEXT: $sgpr64 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN64-FLATSCR-NEXT: $sgpr65 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 1
+    ; GCN64-FLATSCR-NEXT: $sgpr66 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 2
+    ; GCN64-FLATSCR-NEXT: $sgpr67 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 3
+    ; GCN64-FLATSCR-NEXT: $sgpr68 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 4
+    ; GCN64-FLATSCR-NEXT: $sgpr69 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 5
+    ; GCN64-FLATSCR-NEXT: $sgpr70 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 6
+    ; GCN64-FLATSCR-NEXT: $sgpr71 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 7
+    ; GCN64-FLATSCR-NEXT: $sgpr72 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 8
+    ; GCN64-FLATSCR-NEXT: $sgpr73 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 9
+    ; GCN64-FLATSCR-NEXT: $sgpr74 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 10
+    ; GCN64-FLATSCR-NEXT: $sgpr75 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 11
+    ; GCN64-FLATSCR-NEXT: $sgpr76 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 12
+    ; GCN64-FLATSCR-NEXT: $sgpr77 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 13
+    ; GCN64-FLATSCR-NEXT: $sgpr78 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 14
+    ; GCN64-FLATSCR-NEXT: $sgpr79 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 15
+    ; GCN64-FLATSCR-NEXT: $sgpr80 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 16
+    ; GCN64-FLATSCR-NEXT: $sgpr81 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 17
+    ; GCN64-FLATSCR-NEXT: $sgpr82 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 18
+    ; GCN64-FLATSCR-NEXT: $sgpr83 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 19
+    ; GCN64-FLATSCR-NEXT: $sgpr84 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 20
+    ; GCN64-FLATSCR-NEXT: $sgpr85 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 21
+    ; GCN64-FLATSCR-NEXT: $sgpr86 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 22
+    ; GCN64-FLATSCR-NEXT: $sgpr87 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 23
+    ; GCN64-FLATSCR-NEXT: $sgpr88 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 24
+    ; GCN64-FLATSCR-NEXT: $sgpr89 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 25
+    ; GCN64-FLATSCR-NEXT: $sgpr90 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 26
+    ; GCN64-FLATSCR-NEXT: $sgpr91 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 27
+    ; GCN64-FLATSCR-NEXT: $sgpr92 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 28
+    ; GCN64-FLATSCR-NEXT: $sgpr93 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 29
+    ; GCN64-FLATSCR-NEXT: $sgpr94 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 30
+    ; GCN64-FLATSCR-NEXT: $sgpr95 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 31
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
@@ -1016,7 +1020,7 @@ body:             |
     ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $sgpr2 = S_ADD_I32 $sgpr33, 4096, implicit-def dead $scc
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR killed $sgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, align 4096, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $sgpr12 = V_READLANE_B32 killed $vgpr0, 0
+    ; GCN64-FLATSCR-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 0
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     renamable $sgpr12 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
diff --git a/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills.mir b/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills.mir
index 5a022efb38a35bd..2de7d86223eb247 100644
--- a/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills.mir
+++ b/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills.mir
@@ -1,14 +1,14 @@
 # RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass=si-lower-sgpr-spills -o - %s | FileCheck %s
 
 # CHECK-LABEL: name: empty_entry_block
-# CHECK: V_WRITELANE
-# CHECK-NEXT: V_WRITELANE
-# CHECK-NEXT: V_WRITELANE
-# CHECK-NEXT: V_WRITELANE
-# CHECK: V_READLANE
-# CHECK-NEXT: V_READLANE
-# CHECK-NEXT: V_READLANE
-# CHECK-NEXT: V_READLANE
+# CHECK: SI_SPILL_S32_TO_VGPR
+# CHECK-NEXT: SI_SPILL_S32_TO_VGPR
+# CHECK-NEXT: SI_SPILL_S32_TO_VGPR
+# CHECK-NEXT: SI_SPILL_S32_TO_VGPR
+# CHECK: SI_RESTORE_S32_FROM_VGPR
+# CHECK-NEXT: SI_RESTORE_S32_FROM_VGPR
+# CHECK-NEXT: SI_RESTORE_S32_FROM_VGPR
+# CHECK-NEXT: SI_RESTORE_S32_FROM_VGPR
 ---
 name:            empty_entry_block
 body: |
diff --git a/llvm/test/CodeGen/AMDGPU/snippet-copy-bundle-regression.mir b/llvm/test/CodeGen/AMDGPU/snippet-copy-bundle-regression.mir
index 55639a27fd5c746..b22eb19e3661720 100644
--- a/llvm/test/CodeGen/AMDGPU/snippet-copy-bundle-regression.mir
+++ b/llvm/test/CodeGen/AMDGPU/snippet-copy-bundle-regression.mir
@@ -42,14 +42,14 @@ body:             |
   ; CHECK-NEXT:   renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX8_IMM renamable $sgpr38_sgpr39, 0, 0 :: (dereferenceable invariant load (s256), align 16, addrspace 4)
   ; CHECK-NEXT:   dead renamable $sgpr4 = S_LOAD_DWORD_IMM renamable $sgpr38_sgpr39, 48, 0 :: (dereferenceable invariant load (s32), align 16, addrspace 4)
   ; CHECK-NEXT:   renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 = S_LOAD_DWORDX8_IMM renamable $sgpr38_sgpr39, 56, 0 :: (dereferenceable invariant load (s256), align 8, addrspace 4)
-  ; CHECK-NEXT:   renamable $vgpr1 = V_WRITELANE_B32 $sgpr4, 0, killed $vgpr1, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11
-  ; CHECK-NEXT:   renamable $vgpr1 = V_WRITELANE_B32 $sgpr5, 1, killed $vgpr1
-  ; CHECK-NEXT:   renamable $vgpr1 = V_WRITELANE_B32 $sgpr6, 2, killed $vgpr1
-  ; CHECK-NEXT:   renamable $vgpr1 = V_WRITELANE_B32 $sgpr7, 3, killed $vgpr1
-  ; CHECK-NEXT:   renamable $vgpr1 = V_WRITELANE_B32 $sgpr8, 4, killed $vgpr1
-  ; CHECK-NEXT:   renamable $vgpr1 = V_WRITELANE_B32 $sgpr9, 5, killed $vgpr1
-  ; CHECK-NEXT:   renamable $vgpr1 = V_WRITELANE_B32 $sgpr10, 6, killed $vgpr1
-  ; CHECK-NEXT:   renamable $vgpr1 = V_WRITELANE_B32 killed $sgpr11, 7, killed $vgpr1, implicit killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11
+  ; CHECK-NEXT:   renamable $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, killed $vgpr1, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11
+  ; CHECK-NEXT:   renamable $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr5, 1, killed $vgpr1
+  ; CHECK-NEXT:   renamable $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr6, 2, killed $vgpr1
+  ; CHECK-NEXT:   renamable $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr7, 3, killed $vgpr1
+  ; CHECK-NEXT:   renamable $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr8, 4, killed $vgpr1
+  ; CHECK-NEXT:   renamable $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr9, 5, killed $vgpr1
+  ; CHECK-NEXT:   renamable $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr10, 6, killed $vgpr1
+  ; CHECK-NEXT:   renamable $vgpr1 = SI_SPILL_S32_TO_VGPR killed $sgpr11, 7, killed $vgpr1, implicit killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11
   ; CHECK-NEXT:   SI_SPILL_WWM_V32_SAVE killed $vgpr1, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
   ; CHECK-NEXT:   dead renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM renamable $sgpr44_sgpr45, 0, 0 :: (invariant load (s64), align 16, addrspace 4)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
@@ -64,14 +64,14 @@ body:             |
   ; CHECK-NEXT:   liveins: $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51:0x000000000000FC00
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   renamable $vgpr1 = SI_SPILL_WWM_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-  ; CHECK-NEXT:   $sgpr4 = V_READLANE_B32 $vgpr1, 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11
-  ; CHECK-NEXT:   $sgpr5 = V_READLANE_B32 $vgpr1, 1
-  ; CHECK-NEXT:   $sgpr6 = V_READLANE_B32 $vgpr1, 2
-  ; CHECK-NEXT:   $sgpr7 = V_READLANE_B32 $vgpr1, 3
-  ; CHECK-NEXT:   $sgpr8 = V_READLANE_B32 $vgpr1, 4
-  ; CHECK-NEXT:   $sgpr9 = V_READLANE_B32 $vgpr1, 5
-  ; CHECK-NEXT:   $sgpr10 = V_READLANE_B32 $vgpr1, 6
-  ; CHECK-NEXT:   $sgpr11 = V_READLANE_B32 $vgpr1, 7
+  ; CHECK-NEXT:   $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11
+  ; CHECK-NEXT:   $sgpr5 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 1
+  ; CHECK-NEXT:   $sgpr6 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 2
+  ; CHECK-NEXT:   $sgpr7 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 3
+  ; CHECK-NEXT:   $sgpr8 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 4
+  ; CHECK-NEXT:   $sgpr9 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 5
+  ; CHECK-NEXT:   $sgpr10 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 6
+  ; CHECK-NEXT:   $sgpr11 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 7
   ; CHECK-NEXT:   $noreg = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
   ; CHECK-NEXT:   $exec = S_MOV_B64 killed $noreg
   ; CHECK-NEXT:   S_BRANCH %bb.3
@@ -81,14 +81,14 @@ body:             |
   ; CHECK-NEXT:   liveins: $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51:0x000000000000FC00
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   renamable $vgpr1 = SI_SPILL_WWM_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-  ; CHECK-NEXT:   $sgpr4 = V_READLANE_B32 $vgpr1, 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11
-  ; CHECK-NEXT:   $sgpr5 = V_READLANE_B32 $vgpr1, 1
-  ; CHECK-NEXT:   $sgpr6 = V_READLANE_B32 $vgpr1, 2
-  ; CHECK-NEXT:   $sgpr7 = V_READLANE_B32 $vgpr1, 3
-  ; CHECK-NEXT:   $sgpr8 = V_READLANE_B32 $vgpr1, 4
-  ; CHECK-NEXT:   $sgpr9 = V_READLANE_B32 $vgpr1, 5
-  ; CHECK-NEXT:   $sgpr10 = V_READLANE_B32 $vgpr1, 6
-  ; CHECK-NEXT:   $sgpr11 = V_READLANE_B32 $vgpr1, 7
+  ; CHECK-NEXT:   $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11
+  ; CHECK-NEXT:   $sgpr5 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 1
+  ; CHECK-NEXT:   $sgpr6 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 2
+  ; CHECK-NEXT:   $sgpr7 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 3
+  ; CHECK-NEXT:   $sgpr8 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 4
+  ; CHECK-NEXT:   $sgpr9 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 5
+  ; CHECK-NEXT:   $sgpr10 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 6
+  ; CHECK-NEXT:   $sgpr11 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 7
   ; CHECK-NEXT:   S_CMP_LG_U64 renamable $sgpr4_sgpr5, 0, implicit-def $scc
   ; CHECK-NEXT:   $noreg = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
   ; CHECK-NEXT:   $exec = S_MOV_B64 killed $noreg
diff --git a/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir b/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir
index 95000a143de808f..5da314b9f0e3b1c 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir
@@ -27,10 +27,10 @@ body:             |
     ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr8_sgpr9
     ; GCN-NEXT: renamable $vgpr0 = IMPLICIT_DEF
     ; GCN-NEXT: renamable $sgpr1 = COPY $sgpr2
-    ; GCN-NEXT: renamable $vgpr0 = V_WRITELANE_B32 $sgpr0, 0, killed $vgpr0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3
-    ; GCN-NEXT: renamable $vgpr0 = V_WRITELANE_B32 $sgpr1, 1, killed $vgpr0
-    ; GCN-NEXT: renamable $vgpr0 = V_WRITELANE_B32 $sgpr2, 2, killed $vgpr0
-    ; GCN-NEXT: renamable $vgpr0 = V_WRITELANE_B32 $sgpr3, 3, killed $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr0, 0, killed $vgpr0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr1, 1, killed $vgpr0
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr2, 2, killed $vgpr0
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr3, 3, killed $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
     ; GCN-NEXT: renamable $sgpr8 = COPY renamable $sgpr1
     ; GCN-NEXT: KILL killed renamable $vgpr0
     ; GCN-NEXT: $sgpr0_sgpr1 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
@@ -66,10 +66,10 @@ body:             |
     ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr8_sgpr9
     ; GCN-NEXT: renamable $vgpr0 = IMPLICIT_DEF
     ; GCN-NEXT: renamable $sgpr1 = COPY $sgpr2
-    ; GCN-NEXT: renamable $vgpr0 = V_WRITELANE_B32 $sgpr0, 0, killed $vgpr0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3
-    ; GCN-NEXT: renamable $vgpr0 = V_WRITELANE_B32 $sgpr1, 1, killed $vgpr0
-    ; GCN-NEXT: renamable $vgpr0 = V_WRITELANE_B32 $sgpr2, 2, killed $vgpr0
-    ; GCN-NEXT: renamable $vgpr0 = V_WRITELANE_B32 $sgpr3, 3, killed $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr0, 0, killed $vgpr0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr1, 1, killed $vgpr0
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr2, 2, killed $vgpr0
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr3, 3, killed $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
     ; GCN-NEXT: KILL killed renamable $vgpr0
     ; GCN-NEXT: $sgpr0_sgpr1 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
     ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
diff --git a/llvm/test/CodeGen/AMDGPU/spill-sgpr-csr-live-ins.mir b/llvm/test/CodeGen/AMDGPU/spill-sgpr-csr-live-ins.mir
index 89c305b82b451cd..d718b4932183512 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-sgpr-csr-live-ins.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill-sgpr-csr-live-ins.mir
@@ -12,7 +12,7 @@ body:             |
     ; CHECK-LABEL: name: spill_csr_sgpr_argument
     ; CHECK: liveins: $sgpr50, $vgpr0
     ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr50, 0, $vgpr0
+    ; CHECK-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr50, 0, $vgpr0
     ; CHECK-NEXT: S_NOP 0, implicit $sgpr50
     ; CHECK-NEXT: $sgpr50 = S_MOV_B32 0
     S_NOP 0, implicit $sgpr50
diff --git a/llvm/test/CodeGen/AMDGPU/spill-sgpr-to-virtual-vgpr.mir b/llvm/test/CodeGen/AMDGPU/spill-sgpr-to-virtual-vgpr.mir
index a77e315dc54a474..efb022ccb0d5568 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-sgpr-to-virtual-vgpr.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill-sgpr-to-virtual-vgpr.mir
@@ -23,8 +23,8 @@ body:             |
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
     ; GCN-NEXT: S_NOP 0
-    ; GCN-NEXT: [[V_WRITELANE_B32_:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 killed $sgpr10, 0, [[V_WRITELANE_B32_]]
-    ; GCN-NEXT: $sgpr10 = V_READLANE_B32 [[V_WRITELANE_B32_]], 0
+    ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr10, 0, [[DEF]]
+    ; GCN-NEXT: $sgpr10 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 0
     ; GCN-NEXT: S_SETPC_B64 $sgpr30_sgpr31
     S_NOP 0
     SI_SPILL_S32_SAVE killed $sgpr10, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
@@ -56,106 +56,106 @@ body:             |
     ; GCN: liveins: $sgpr10, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $vgpr0, $sgpr30_sgpr31, $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71, $sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79, $sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87, $sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
-    ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr64, 0, $vgpr0
-    ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr65, 1, $vgpr0
-    ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr66, 2, $vgpr0
-    ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr67, 3, $vgpr0
-    ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr68, 4, $vgpr0
-    ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr69, 5, $vgpr0
-    ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr70, 6, $vgpr0
-    ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr71, 7, $vgpr0
-    ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr72, 8, $vgpr0
-    ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr73, 9, $vgpr0
-    ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr74, 10, $vgpr0
-    ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr75, 11, $vgpr0
-    ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr76, 12, $vgpr0
-    ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr77, 13, $vgpr0
-    ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr78, 14, $vgpr0
-    ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr79, 15, $vgpr0
-    ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr80, 16, $vgpr0
-    ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr81, 17, $vgpr0
-    ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr82, 18, $vgpr0
-    ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr83, 19, $vgpr0
-    ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr84, 20, $vgpr0
-    ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr85, 21, $vgpr0
-    ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr86, 22, $vgpr0
-    ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr87, 23, $vgpr0
-    ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr88, 24, $vgpr0
-    ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr89, 25, $vgpr0
-    ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr90, 26, $vgpr0
-    ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr91, 27, $vgpr0
-    ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr92, 28, $vgpr0
-    ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr93, 29, $vgpr0
-    ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr94, 30, $vgpr0
-    ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr95, 31, $vgpr0
+    ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr64, 0, $vgpr0
+    ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr65, 1, $vgpr0
+    ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr66, 2, $vgpr0
+    ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr67, 3, $vgpr0
+    ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr68, 4, $vgpr0
+    ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr69, 5, $vgpr0
+    ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr70, 6, $vgpr0
+    ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr71, 7, $vgpr0
+    ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr72, 8, $vgpr0
+    ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr73, 9, $vgpr0
+    ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr74, 10, $vgpr0
+    ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr75, 11, $vgpr0
+    ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr76, 12, $vgpr0
+    ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr77, 13, $vgpr0
+    ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr78, 14, $vgpr0
+    ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr79, 15, $vgpr0
+    ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr80, 16, $vgpr0
+    ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr81, 17, $vgpr0
+    ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr82, 18, $vgpr0
+    ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr83, 19, $vgpr0
+    ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr84, 20, $vgpr0
+    ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr85, 21, $vgpr0
+    ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr86, 22, $vgpr0
+    ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr87, 23, $vgpr0
+    ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr88, 24, $vgpr0
+    ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr89, 25, $vgpr0
+    ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr90, 26, $vgpr0
+    ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr91, 27, $vgpr0
+    ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr92, 28, $vgpr0
+    ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr93, 29, $vgpr0
+    ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr94, 30, $vgpr0
+    ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr95, 31, $vgpr0
     ; GCN-NEXT: S_NOP 0
-    ; GCN-NEXT: [[V_WRITELANE_B32_:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 killed $sgpr10, 0, [[V_WRITELANE_B32_]]
-    ; GCN-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr64, 1, [[V_WRITELANE_B32_1]], implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr65, 2, [[V_WRITELANE_B32_1]]
-    ; GCN-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr66, 3, [[V_WRITELANE_B32_1]]
-    ; GCN-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr67, 4, [[V_WRITELANE_B32_1]]
-    ; GCN-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr68, 5, [[V_WRITELANE_B32_1]]
-    ; GCN-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr69, 6, [[V_WRITELANE_B32_1]]
-    ; GCN-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr70, 7, [[V_WRITELANE_B32_1]]
-    ; GCN-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr71, 8, [[V_WRITELANE_B32_1]]
-    ; GCN-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr72, 9, [[V_WRITELANE_B32_1]]
-    ; GCN-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr73, 10, [[V_WRITELANE_B32_1]]
-    ; GCN-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr74, 11, [[V_WRITELANE_B32_1]]
-    ; GCN-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr75, 12, [[V_WRITELANE_B32_1]]
-    ; GCN-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr76, 13, [[V_WRITELANE_B32_1]]
-    ; GCN-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr77, 14, [[V_WRITELANE_B32_1]]
-    ; GCN-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr78, 15, [[V_WRITELANE_B32_1]]
-    ; GCN-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr79, 16, [[V_WRITELANE_B32_1]]
-    ; GCN-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr80, 17, [[V_WRITELANE_B32_1]]
-    ; GCN-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr81, 18, [[V_WRITELANE_B32_1]]
-    ; GCN-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr82, 19, [[V_WRITELANE_B32_1]]
-    ; GCN-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr83, 20, [[V_WRITELANE_B32_1]]
-    ; GCN-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr84, 21, [[V_WRITELANE_B32_1]]
-    ; GCN-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr85, 22, [[V_WRITELANE_B32_1]]
-    ; GCN-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr86, 23, [[V_WRITELANE_B32_1]]
-    ; GCN-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr87, 24, [[V_WRITELANE_B32_1]]
-    ; GCN-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr88, 25, [[V_WRITELANE_B32_1]]
-    ; GCN-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr89, 26, [[V_WRITELANE_B32_1]]
-    ; GCN-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr90, 27, [[V_WRITELANE_B32_1]]
-    ; GCN-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr91, 28, [[V_WRITELANE_B32_1]]
-    ; GCN-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr92, 29, [[V_WRITELANE_B32_1]]
-    ; GCN-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr93, 30, [[V_WRITELANE_B32_1]]
-    ; GCN-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr94, 31, [[V_WRITELANE_B32_1]]
-    ; GCN-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 killed $sgpr95, 32, [[V_WRITELANE_B32_1]], implicit killed $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr10, 0, [[DEF]]
+    ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr64, 1, [[DEF]], implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr65, 2, [[DEF]]
+    ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr66, 3, [[DEF]]
+    ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr67, 4, [[DEF]]
+    ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr68, 5, [[DEF]]
+    ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr69, 6, [[DEF]]
+    ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr70, 7, [[DEF]]
+    ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr71, 8, [[DEF]]
+    ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr72, 9, [[DEF]]
+    ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr73, 10, [[DEF]]
+    ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr74, 11, [[DEF]]
+    ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr75, 12, [[DEF]]
+    ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr76, 13, [[DEF]]
+    ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr77, 14, [[DEF]]
+    ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr78, 15, [[DEF]]
+    ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr79, 16, [[DEF]]
+    ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr80, 17, [[DEF]]
+    ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr81, 18, [[DEF]]
+    ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr82, 19, [[DEF]]
+    ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr83, 20, [[DEF]]
+    ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr84, 21, [[DEF]]
+    ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr85, 22, [[DEF]]
+    ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr86, 23, [[DEF]]
+    ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr87, 24, [[DEF]]
+    ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr88, 25, [[DEF]]
+    ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr89, 26, [[DEF]]
+    ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr90, 27, [[DEF]]
+    ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr91, 28, [[DEF]]
+    ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr92, 29, [[DEF]]
+    ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr93, 30, [[DEF]]
+    ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr94, 31, [[DEF]]
+    ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr95, 32, [[DEF]], implicit killed $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
     ; GCN-NEXT: S_NOP 0
-    ; GCN-NEXT: $sgpr64 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 1, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN-NEXT: $sgpr65 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 2
-    ; GCN-NEXT: $sgpr66 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 3
-    ; GCN-NEXT: $sgpr67 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 4
-    ; GCN-NEXT: $sgpr68 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 5
-    ; GCN-NEXT: $sgpr69 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 6
-    ; GCN-NEXT: $sgpr70 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 7
-    ; GCN-NEXT: $sgpr71 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 8
-    ; GCN-NEXT: $sgpr72 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 9
-    ; GCN-NEXT: $sgpr73 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 10
-    ; GCN-NEXT: $sgpr74 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 11
-    ; GCN-NEXT: $sgpr75 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 12
-    ; GCN-NEXT: $sgpr76 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 13
-    ; GCN-NEXT: $sgpr77 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 14
-    ; GCN-NEXT: $sgpr78 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 15
-    ; GCN-NEXT: $sgpr79 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 16
-    ; GCN-NEXT: $sgpr80 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 17
-    ; GCN-NEXT: $sgpr81 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 18
-    ; GCN-NEXT: $sgpr82 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 19
-    ; GCN-NEXT: $sgpr83 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 20
-    ; GCN-NEXT: $sgpr84 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 21
-    ; GCN-NEXT: $sgpr85 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 22
-    ; GCN-NEXT: $sgpr86 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 23
-    ; GCN-NEXT: $sgpr87 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 24
-    ; GCN-NEXT: $sgpr88 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 25
-    ; GCN-NEXT: $sgpr89 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 26
-    ; GCN-NEXT: $sgpr90 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 27
-    ; GCN-NEXT: $sgpr91 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 28
-    ; GCN-NEXT: $sgpr92 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 29
-    ; GCN-NEXT: $sgpr93 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 30
-    ; GCN-NEXT: $sgpr94 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 31
-    ; GCN-NEXT: $sgpr95 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 32
-    ; GCN-NEXT: $sgpr10 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 0
+    ; GCN-NEXT: $sgpr64 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 1, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+    ; GCN-NEXT: $sgpr65 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 2
+    ; GCN-NEXT: $sgpr66 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 3
+    ; GCN-NEXT: $sgpr67 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 4
+    ; GCN-NEXT: $sgpr68 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 5
+    ; GCN-NEXT: $sgpr69 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 6
+    ; GCN-NEXT: $sgpr70 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 7
+    ; GCN-NEXT: $sgpr71 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 8
+    ; GCN-NEXT: $sgpr72 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 9
+    ; GCN-NEXT: $sgpr73 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 10
+    ; GCN-NEXT: $sgpr74 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 11
+    ; GCN-NEXT: $sgpr75 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 12
+    ; GCN-NEXT: $sgpr76 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 13
+    ; GCN-NEXT: $sgpr77 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 14
+    ; GCN-NEXT: $sgpr78 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 15
+    ; GCN-NEXT: $sgpr79 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 16
+    ; GCN-NEXT: $sgpr80 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 17
+    ; GCN-NEXT: $sgpr81 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 18
+    ; GCN-NEXT: $sgpr82 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 19
+    ; GCN-NEXT: $sgpr83 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 20
+    ; GCN-NEXT: $sgpr84 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 21
+    ; GCN-NEXT: $sgpr85 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 22
+    ; GCN-NEXT: $sgpr86 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 23
+    ; GCN-NEXT: $sgpr87 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 24
+    ; GCN-NEXT: $sgpr88 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 25
+    ; GCN-NEXT: $sgpr89 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 26
+    ; GCN-NEXT: $sgpr90 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 27
+    ; GCN-NEXT: $sgpr91 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 28
+    ; GCN-NEXT: $sgpr92 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 29
+    ; GCN-NEXT: $sgpr93 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 30
+    ; GCN-NEXT: $sgpr94 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 31
+    ; GCN-NEXT: $sgpr95 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 32
+    ; GCN-NEXT: $sgpr10 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 0
     ; GCN-NEXT: S_SETPC_B64 $sgpr30_sgpr31
     S_NOP 0
     SI_SPILL_S32_SAVE killed $sgpr10, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
@@ -197,7 +197,7 @@ body:             |
   ; GCN-NEXT:   liveins: $sgpr10, $sgpr30_sgpr31
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   $sgpr10 = S_MOV_B32 10
-  ; GCN-NEXT:   [[V_WRITELANE_B32_:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 killed $sgpr10, 0, [[V_WRITELANE_B32_]]
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr10, 0, [[DEF]]
   ; GCN-NEXT:   S_BRANCH %bb.3
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT: bb.2:
@@ -205,13 +205,13 @@ body:             |
   ; GCN-NEXT:   liveins: $sgpr10, $sgpr30_sgpr31
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   $sgpr10 = S_MOV_B32 20
-  ; GCN-NEXT:   [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 killed $sgpr10, 0, [[V_WRITELANE_B32_1]]
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr10, 0, [[DEF]]
   ; GCN-NEXT:   S_BRANCH %bb.3
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT: bb.3:
   ; GCN-NEXT:   liveins: $sgpr10, $sgpr30_sgpr31
   ; GCN-NEXT: {{  $}}
-  ; GCN-NEXT:   $sgpr10 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 0
+  ; GCN-NEXT:   $sgpr10 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 0
   ; GCN-NEXT:   S_SETPC_B64 $sgpr30_sgpr31, implicit $sgpr10
   bb.0:
     liveins: $sgpr10, $sgpr11, $sgpr30_sgpr31
@@ -264,7 +264,7 @@ body:             |
   ; GCN-NEXT:   successors: %bb.2(0x80000000)
   ; GCN-NEXT:   liveins: $sgpr10, $sgpr30_sgpr31
   ; GCN-NEXT: {{  $}}
-  ; GCN-NEXT:   $sgpr10 = V_READLANE_B32 [[DEF]], 0
+  ; GCN-NEXT:   $sgpr10 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 0
   ; GCN-NEXT:   $sgpr10 = S_ADD_I32 $sgpr10, 15, implicit-def dead $scc
   ; GCN-NEXT:   S_BRANCH %bb.2
   ; GCN-NEXT: {{  $}}
@@ -272,7 +272,7 @@ body:             |
   ; GCN-NEXT:   successors: %bb.3(0x80000000)
   ; GCN-NEXT:   liveins: $sgpr10, $sgpr30_sgpr31
   ; GCN-NEXT: {{  $}}
-  ; GCN-NEXT:   $sgpr10 = V_READLANE_B32 [[DEF]], 0
+  ; GCN-NEXT:   $sgpr10 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 0
   ; GCN-NEXT:   $sgpr10 = S_ADD_I32 $sgpr10, 20, implicit-def dead $scc
   ; GCN-NEXT:   S_BRANCH %bb.3
   ; GCN-NEXT: {{  $}}
@@ -281,7 +281,7 @@ body:             |
   ; GCN-NEXT:   liveins: $sgpr10, $sgpr11, $sgpr30_sgpr31
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   $sgpr10 = S_MOV_B32 10
-  ; GCN-NEXT:   [[V_WRITELANE_B32_:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 killed $sgpr10, 0, [[V_WRITELANE_B32_]]
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr10, 0, [[DEF]]
   ; GCN-NEXT:   S_CMP_EQ_U32 $sgpr11, 0, implicit-def $scc
   ; GCN-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit killed $scc
   ; GCN-NEXT:   S_BRANCH %bb.1
diff --git a/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir b/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir
index 69442032f37ec85..3892ceb418959f1 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir
@@ -51,8 +51,8 @@ body:             |
     ; GFX9-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GFX9-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
     ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GFX9-NEXT: $vgpr0 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr0, implicit $vcc
-    ; GFX9-NEXT: $vgpr0 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr0, implicit $vcc
+    ; GFX9-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $vcc_lo, 0, undef $vgpr0, implicit $vcc
+    ; GFX9-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $vcc_hi, 1, $vgpr0, implicit $vcc
     ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
     ; GFX9-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GFX9-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
@@ -60,8 +60,8 @@ body:             |
     ; GFX9-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GFX9-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
     ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GFX9-NEXT: $vgpr0 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr0, implicit $vcc
-    ; GFX9-NEXT: $vgpr0 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr0, implicit killed $vcc
+    ; GFX9-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $vcc_lo, 0, undef $vgpr0, implicit $vcc
+    ; GFX9-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $vcc_hi, 1, $vgpr0, implicit killed $vcc
     ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
     ; GFX9-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GFX9-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
@@ -69,10 +69,11 @@ body:             |
     ; GFX9-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
     ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
     ; GFX9-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
-    ; GFX9-NEXT: $vcc_lo = V_READLANE_B32 $vgpr0, 0, implicit-def $vcc
-    ; GFX9-NEXT: $vcc_hi = V_READLANE_B32 killed $vgpr0, 1
+    ; GFX9-NEXT: $vcc_lo = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $vcc
+    ; GFX9-NEXT: $vcc_hi = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 1
     ; GFX9-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GFX9-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
+    ;
     ; GFX10-LABEL: name: check_vcc
     ; GFX10: liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr9
     ; GFX10-NEXT: {{  $}}
@@ -87,8 +88,8 @@ body:             |
     ; GFX10-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GFX10-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
     ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GFX10-NEXT: $vgpr0 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr0, implicit $vcc
-    ; GFX10-NEXT: $vgpr0 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr0, implicit $vcc
+    ; GFX10-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $vcc_lo, 0, undef $vgpr0, implicit $vcc
+    ; GFX10-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $vcc_hi, 1, $vgpr0, implicit $vcc
     ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
     ; GFX10-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GFX10-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
@@ -96,8 +97,8 @@ body:             |
     ; GFX10-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GFX10-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
     ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GFX10-NEXT: $vgpr0 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr0, implicit $vcc
-    ; GFX10-NEXT: $vgpr0 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr0, implicit killed $vcc
+    ; GFX10-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $vcc_lo, 0, undef $vgpr0, implicit $vcc
+    ; GFX10-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $vcc_hi, 1, $vgpr0, implicit killed $vcc
     ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
     ; GFX10-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GFX10-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
@@ -105,10 +106,11 @@ body:             |
     ; GFX10-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
     ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
     ; GFX10-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
-    ; GFX10-NEXT: $vcc_lo = V_READLANE_B32 $vgpr0, 0, implicit-def $vcc
-    ; GFX10-NEXT: $vcc_hi = V_READLANE_B32 killed $vgpr0, 1
+    ; GFX10-NEXT: $vcc_lo = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $vcc
+    ; GFX10-NEXT: $vcc_hi = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 1
     ; GFX10-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GFX10-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
+    ;
     ; GFX11-LABEL: name: check_vcc
     ; GFX11: liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7
     ; GFX11-NEXT: {{  $}}
@@ -117,8 +119,8 @@ body:             |
     ; GFX11-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GFX11-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
     ; GFX11-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GFX11-NEXT: $vgpr0 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr0, implicit $vcc
-    ; GFX11-NEXT: $vgpr0 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr0, implicit $vcc
+    ; GFX11-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $vcc_lo, 0, undef $vgpr0, implicit $vcc
+    ; GFX11-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $vcc_hi, 1, $vgpr0, implicit $vcc
     ; GFX11-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
     ; GFX11-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GFX11-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
@@ -126,8 +128,8 @@ body:             |
     ; GFX11-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GFX11-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
     ; GFX11-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-    ; GFX11-NEXT: $vgpr0 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr0, implicit $vcc
-    ; GFX11-NEXT: $vgpr0 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr0, implicit killed $vcc
+    ; GFX11-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $vcc_lo, 0, undef $vgpr0, implicit $vcc
+    ; GFX11-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $vcc_hi, 1, $vgpr0, implicit killed $vcc
     ; GFX11-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
     ; GFX11-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GFX11-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
@@ -135,8 +137,8 @@ body:             |
     ; GFX11-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
     ; GFX11-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
     ; GFX11-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5)
-    ; GFX11-NEXT: $vcc_lo = V_READLANE_B32 $vgpr0, 0, implicit-def $vcc
-    ; GFX11-NEXT: $vcc_hi = V_READLANE_B32 killed $vgpr0, 1
+    ; GFX11-NEXT: $vcc_lo = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $vcc
+    ; GFX11-NEXT: $vcc_hi = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 1
     ; GFX11-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
     ; GFX11-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     $vcc = IMPLICIT_DEF
diff --git a/llvm/test/CodeGen/AMDGPU/spill-writelane-vgprs.ll b/llvm/test/CodeGen/AMDGPU/spill-writelane-vgprs.ll
index c1fc297d456435b..c1c69ce568a9c40 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-writelane-vgprs.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-writelane-vgprs.ll
@@ -26,22 +26,14 @@ define void @sgpr_spill_writelane() {
   ret void
 }
 
-; FIXME: The writelane intrinsic doesn't really overwrite any inactive lanes
-; and hence there is no need to preserve the VGPR it modifies.
 define void @device_writelane_intrinsic(ptr addrspace(1) %out, i32 %src) {
 ; GCN-LABEL: device_writelane_intrinsic:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    s_xor_saveexec_b64 s[4:5], -1
-; GCN-NEXT:    buffer_store_dword v3, off, s[0:3], s32 ; 4-byte Folded Spill
-; GCN-NEXT:    s_mov_b64 exec, s[4:5]
 ; GCN-NEXT:    v_mov_b32_e32 v3, 15
 ; GCN-NEXT:    v_readfirstlane_b32 s4, v2
 ; GCN-NEXT:    v_writelane_b32 v3, s4, 23
 ; GCN-NEXT:    global_store_dword v[0:1], v3, off
-; GCN-NEXT:    s_xor_saveexec_b64 s[4:5], -1
-; GCN-NEXT:    buffer_load_dword v3, off, s[0:3], s32 ; 4-byte Folded Reload
-; GCN-NEXT:    s_mov_b64 exec, s[4:5]
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %writelane = call i32 @llvm.amdgcn.writelane(i32 %src, i32 23, i32 15)
diff --git a/llvm/test/CodeGen/AMDGPU/spill192.mir b/llvm/test/CodeGen/AMDGPU/spill192.mir
index ff9aeecfaa67395..9371a72d6992b54 100644
--- a/llvm/test/CodeGen/AMDGPU/spill192.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill192.mir
@@ -29,18 +29,19 @@ body: |
   ; SPILLED-NEXT: bb.2:
   ; SPILLED-NEXT:   $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 = SI_SPILL_S192_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s192) from %stack.0, align 4, addrspace 5)
   ; SPILLED-NEXT:   S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9
+  ;
   ; EXPANDED-LABEL: name: spill_restore_sgpr192
   ; EXPANDED: bb.0:
   ; EXPANDED-NEXT:   successors: %bb.1(0x80000000)
   ; EXPANDED-NEXT: {{  $}}
   ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
   ; EXPANDED-NEXT:   S_NOP 0, implicit-def renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9
-  ; EXPANDED-NEXT:   [[V_WRITELANE_B32_:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr4, 0, [[V_WRITELANE_B32_]], implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9
-  ; EXPANDED-NEXT:   [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr5, 1, [[V_WRITELANE_B32_1]]
-  ; EXPANDED-NEXT:   [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr6, 2, [[V_WRITELANE_B32_1]]
-  ; EXPANDED-NEXT:   [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr7, 3, [[V_WRITELANE_B32_1]]
-  ; EXPANDED-NEXT:   [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr8, 4, [[V_WRITELANE_B32_1]]
-  ; EXPANDED-NEXT:   [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 killed $sgpr9, 5, [[V_WRITELANE_B32_1]], implicit killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9
+  ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, [[DEF]], implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9
+  ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr5, 1, [[DEF]]
+  ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr6, 2, [[DEF]]
+  ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr7, 3, [[DEF]]
+  ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr8, 4, [[DEF]]
+  ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr9, 5, [[DEF]], implicit killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9
   ; EXPANDED-NEXT:   S_CBRANCH_SCC1 %bb.1, implicit undef $scc
   ; EXPANDED-NEXT: {{  $}}
   ; EXPANDED-NEXT: bb.1:
@@ -49,12 +50,12 @@ body: |
   ; EXPANDED-NEXT:   S_NOP 1
   ; EXPANDED-NEXT: {{  $}}
   ; EXPANDED-NEXT: bb.2:
-  ; EXPANDED-NEXT:   $sgpr4 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9
-  ; EXPANDED-NEXT:   $sgpr5 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 1
-  ; EXPANDED-NEXT:   $sgpr6 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 2
-  ; EXPANDED-NEXT:   $sgpr7 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 3
-  ; EXPANDED-NEXT:   $sgpr8 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 4
-  ; EXPANDED-NEXT:   $sgpr9 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 5
+  ; EXPANDED-NEXT:   $sgpr4 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9
+  ; EXPANDED-NEXT:   $sgpr5 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 1
+  ; EXPANDED-NEXT:   $sgpr6 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 2
+  ; EXPANDED-NEXT:   $sgpr7 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 3
+  ; EXPANDED-NEXT:   $sgpr8 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 4
+  ; EXPANDED-NEXT:   $sgpr9 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 5
   ; EXPANDED-NEXT:   S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9
   bb.0:
     S_NOP 0, implicit-def %0:sgpr_192
@@ -90,6 +91,7 @@ body: |
   ; SPILLED-NEXT: bb.2:
   ; SPILLED-NEXT:   $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = SI_SPILL_V192_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s192) from %stack.0, align 4, addrspace 5)
   ; SPILLED-NEXT:   S_NOP 0, implicit killed renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
+  ;
   ; EXPANDED-LABEL: name: spill_restore_vgpr192
   ; EXPANDED: bb.0:
   ; EXPANDED-NEXT:   successors: %bb.1(0x80000000)
diff --git a/llvm/test/CodeGen/AMDGPU/spill224.mir b/llvm/test/CodeGen/AMDGPU/spill224.mir
index 0f84821687d7163..2b63abe34eed69b 100644
--- a/llvm/test/CodeGen/AMDGPU/spill224.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill224.mir
@@ -27,19 +27,20 @@ body: |
   ; SPILLED-NEXT: bb.2:
   ; SPILLED-NEXT:   $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 = SI_SPILL_S224_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s224) from %stack.0, align 4, addrspace 5)
   ; SPILLED-NEXT:   S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10
+  ;
   ; EXPANDED-LABEL: name: spill_restore_sgpr224
   ; EXPANDED: bb.0:
   ; EXPANDED-NEXT:   successors: %bb.1(0x80000000)
   ; EXPANDED-NEXT: {{  $}}
   ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
   ; EXPANDED-NEXT:   S_NOP 0, implicit-def renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10
-  ; EXPANDED-NEXT:   [[V_WRITELANE_B32_:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr4, 0, [[V_WRITELANE_B32_]], implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10
-  ; EXPANDED-NEXT:   [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr5, 1, [[V_WRITELANE_B32_1]]
-  ; EXPANDED-NEXT:   [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr6, 2, [[V_WRITELANE_B32_1]]
-  ; EXPANDED-NEXT:   [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr7, 3, [[V_WRITELANE_B32_1]]
-  ; EXPANDED-NEXT:   [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr8, 4, [[V_WRITELANE_B32_1]]
-  ; EXPANDED-NEXT:   [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr9, 5, [[V_WRITELANE_B32_1]]
-  ; EXPANDED-NEXT:   [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 killed $sgpr10, 6, [[V_WRITELANE_B32_1]], implicit killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10
+  ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, [[DEF]], implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10
+  ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr5, 1, [[DEF]]
+  ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr6, 2, [[DEF]]
+  ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr7, 3, [[DEF]]
+  ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr8, 4, [[DEF]]
+  ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr9, 5, [[DEF]]
+  ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr10, 6, [[DEF]], implicit killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10
   ; EXPANDED-NEXT:   S_CBRANCH_SCC1 %bb.1, implicit undef $scc
   ; EXPANDED-NEXT: {{  $}}
   ; EXPANDED-NEXT: bb.1:
@@ -48,13 +49,13 @@ body: |
   ; EXPANDED-NEXT:   S_NOP 1
   ; EXPANDED-NEXT: {{  $}}
   ; EXPANDED-NEXT: bb.2:
-  ; EXPANDED-NEXT:   $sgpr4 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10
-  ; EXPANDED-NEXT:   $sgpr5 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 1
-  ; EXPANDED-NEXT:   $sgpr6 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 2
-  ; EXPANDED-NEXT:   $sgpr7 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 3
-  ; EXPANDED-NEXT:   $sgpr8 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 4
-  ; EXPANDED-NEXT:   $sgpr9 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 5
-  ; EXPANDED-NEXT:   $sgpr10 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 6
+  ; EXPANDED-NEXT:   $sgpr4 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10
+  ; EXPANDED-NEXT:   $sgpr5 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 1
+  ; EXPANDED-NEXT:   $sgpr6 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 2
+  ; EXPANDED-NEXT:   $sgpr7 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 3
+  ; EXPANDED-NEXT:   $sgpr8 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 4
+  ; EXPANDED-NEXT:   $sgpr9 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 5
+  ; EXPANDED-NEXT:   $sgpr10 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 6
   ; EXPANDED-NEXT:   S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10
   bb.0:
     S_NOP 0, implicit-def %0:sgpr_224
@@ -90,6 +91,7 @@ body: |
   ; SPILLED-NEXT: bb.2:
   ; SPILLED-NEXT:   $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 = SI_SPILL_V224_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s224) from %stack.0, align 4, addrspace 5)
   ; SPILLED-NEXT:   S_NOP 0, implicit killed renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
+  ;
   ; EXPANDED-LABEL: name: spill_restore_vgpr224
   ; EXPANDED: bb.0:
   ; EXPANDED-NEXT:   successors: %bb.1(0x80000000)
diff --git a/llvm/test/CodeGen/AMDGPU/spill288.mir b/llvm/test/CodeGen/AMDGPU/spill288.mir
index 219d69fa2d46a1d..29df0b16c1eaa8e 100644
--- a/llvm/test/CodeGen/AMDGPU/spill288.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill288.mir
@@ -27,21 +27,22 @@ body: |
   ; SPILLED-NEXT: bb.2:
   ; SPILLED-NEXT:   $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12 = SI_SPILL_S288_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s288) from %stack.0, align 4, addrspace 5)
   ; SPILLED-NEXT:   S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12
+  ;
   ; EXPANDED-LABEL: name: spill_restore_sgpr288
   ; EXPANDED: bb.0:
   ; EXPANDED-NEXT:   successors: %bb.1(0x80000000)
   ; EXPANDED-NEXT: {{  $}}
   ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
   ; EXPANDED-NEXT:   S_NOP 0, implicit-def renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12
-  ; EXPANDED-NEXT:   [[V_WRITELANE_B32_:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr4, 0, [[V_WRITELANE_B32_]], implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12
-  ; EXPANDED-NEXT:   [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr5, 1, [[V_WRITELANE_B32_1]]
-  ; EXPANDED-NEXT:   [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr6, 2, [[V_WRITELANE_B32_1]]
-  ; EXPANDED-NEXT:   [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr7, 3, [[V_WRITELANE_B32_1]]
-  ; EXPANDED-NEXT:   [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr8, 4, [[V_WRITELANE_B32_1]]
-  ; EXPANDED-NEXT:   [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr9, 5, [[V_WRITELANE_B32_1]]
-  ; EXPANDED-NEXT:   [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr10, 6, [[V_WRITELANE_B32_1]]
-  ; EXPANDED-NEXT:   [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr11, 7, [[V_WRITELANE_B32_1]]
-  ; EXPANDED-NEXT:   [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 killed $sgpr12, 8, [[V_WRITELANE_B32_1]], implicit killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12
+  ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, [[DEF]], implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12
+  ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr5, 1, [[DEF]]
+  ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr6, 2, [[DEF]]
+  ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr7, 3, [[DEF]]
+  ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr8, 4, [[DEF]]
+  ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr9, 5, [[DEF]]
+  ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr10, 6, [[DEF]]
+  ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr11, 7, [[DEF]]
+  ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr12, 8, [[DEF]], implicit killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12
   ; EXPANDED-NEXT:   S_CBRANCH_SCC1 %bb.1, implicit undef $scc
   ; EXPANDED-NEXT: {{  $}}
   ; EXPANDED-NEXT: bb.1:
@@ -50,15 +51,15 @@ body: |
   ; EXPANDED-NEXT:   S_NOP 1
   ; EXPANDED-NEXT: {{  $}}
   ; EXPANDED-NEXT: bb.2:
-  ; EXPANDED-NEXT:   $sgpr4 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12
-  ; EXPANDED-NEXT:   $sgpr5 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 1
-  ; EXPANDED-NEXT:   $sgpr6 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 2
-  ; EXPANDED-NEXT:   $sgpr7 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 3
-  ; EXPANDED-NEXT:   $sgpr8 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 4
-  ; EXPANDED-NEXT:   $sgpr9 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 5
-  ; EXPANDED-NEXT:   $sgpr10 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 6
-  ; EXPANDED-NEXT:   $sgpr11 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 7
-  ; EXPANDED-NEXT:   $sgpr12 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 8
+  ; EXPANDED-NEXT:   $sgpr4 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12
+  ; EXPANDED-NEXT:   $sgpr5 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 1
+  ; EXPANDED-NEXT:   $sgpr6 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 2
+  ; EXPANDED-NEXT:   $sgpr7 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 3
+  ; EXPANDED-NEXT:   $sgpr8 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 4
+  ; EXPANDED-NEXT:   $sgpr9 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 5
+  ; EXPANDED-NEXT:   $sgpr10 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 6
+  ; EXPANDED-NEXT:   $sgpr11 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 7
+  ; EXPANDED-NEXT:   $sgpr12 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 8
   ; EXPANDED-NEXT:   S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12
   bb.0:
     S_NOP 0, implicit-def %0:sgpr_288
@@ -94,6 +95,7 @@ body: |
   ; SPILLED-NEXT: bb.2:
   ; SPILLED-NEXT:   $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 = SI_SPILL_V288_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s288) from %stack.0, align 4, addrspace 5)
   ; SPILLED-NEXT:   S_NOP 0, implicit killed renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8
+  ;
   ; EXPANDED-LABEL: name: spill_restore_vgpr288
   ; EXPANDED: bb.0:
   ; EXPANDED-NEXT:   successors: %bb.1(0x80000000)
diff --git a/llvm/test/CodeGen/AMDGPU/spill320.mir b/llvm/test/CodeGen/AMDGPU/spill320.mir
index dcb4b9dda8783c5..d7ded7d8f01b480 100644
--- a/llvm/test/CodeGen/AMDGPU/spill320.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill320.mir
@@ -27,22 +27,23 @@ body: |
   ; SPILLED-NEXT: bb.2:
   ; SPILLED-NEXT:   $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13 = SI_SPILL_S320_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s320) from %stack.0, align 4, addrspace 5)
   ; SPILLED-NEXT:   S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+  ;
   ; EXPANDED-LABEL: name: spill_restore_sgpr320
   ; EXPANDED: bb.0:
   ; EXPANDED-NEXT:   successors: %bb.1(0x80000000)
   ; EXPANDED-NEXT: {{  $}}
   ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
   ; EXPANDED-NEXT:   S_NOP 0, implicit-def renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
-  ; EXPANDED-NEXT:   [[V_WRITELANE_B32_:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr4, 0, [[V_WRITELANE_B32_]], implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
-  ; EXPANDED-NEXT:   [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr5, 1, [[V_WRITELANE_B32_1]]
-  ; EXPANDED-NEXT:   [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr6, 2, [[V_WRITELANE_B32_1]]
-  ; EXPANDED-NEXT:   [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr7, 3, [[V_WRITELANE_B32_1]]
-  ; EXPANDED-NEXT:   [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr8, 4, [[V_WRITELANE_B32_1]]
-  ; EXPANDED-NEXT:   [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr9, 5, [[V_WRITELANE_B32_1]]
-  ; EXPANDED-NEXT:   [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr10, 6, [[V_WRITELANE_B32_1]]
-  ; EXPANDED-NEXT:   [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr11, 7, [[V_WRITELANE_B32_1]]
-  ; EXPANDED-NEXT:   [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr12, 8, [[V_WRITELANE_B32_1]]
-  ; EXPANDED-NEXT:   [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 killed $sgpr13, 9, [[V_WRITELANE_B32_1]], implicit killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+  ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, [[DEF]], implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+  ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr5, 1, [[DEF]]
+  ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr6, 2, [[DEF]]
+  ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr7, 3, [[DEF]]
+  ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr8, 4, [[DEF]]
+  ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr9, 5, [[DEF]]
+  ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr10, 6, [[DEF]]
+  ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr11, 7, [[DEF]]
+  ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr12, 8, [[DEF]]
+  ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr13, 9, [[DEF]], implicit killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
   ; EXPANDED-NEXT:   S_CBRANCH_SCC1 %bb.1, implicit undef $scc
   ; EXPANDED-NEXT: {{  $}}
   ; EXPANDED-NEXT: bb.1:
@@ -51,16 +52,16 @@ body: |
   ; EXPANDED-NEXT:   S_NOP 1
   ; EXPANDED-NEXT: {{  $}}
   ; EXPANDED-NEXT: bb.2:
-  ; EXPANDED-NEXT:   $sgpr4 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
-  ; EXPANDED-NEXT:   $sgpr5 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 1
-  ; EXPANDED-NEXT:   $sgpr6 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 2
-  ; EXPANDED-NEXT:   $sgpr7 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 3
-  ; EXPANDED-NEXT:   $sgpr8 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 4
-  ; EXPANDED-NEXT:   $sgpr9 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 5
-  ; EXPANDED-NEXT:   $sgpr10 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 6
-  ; EXPANDED-NEXT:   $sgpr11 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 7
-  ; EXPANDED-NEXT:   $sgpr12 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 8
-  ; EXPANDED-NEXT:   $sgpr13 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 9
+  ; EXPANDED-NEXT:   $sgpr4 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+  ; EXPANDED-NEXT:   $sgpr5 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 1
+  ; EXPANDED-NEXT:   $sgpr6 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 2
+  ; EXPANDED-NEXT:   $sgpr7 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 3
+  ; EXPANDED-NEXT:   $sgpr8 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 4
+  ; EXPANDED-NEXT:   $sgpr9 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 5
+  ; EXPANDED-NEXT:   $sgpr10 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 6
+  ; EXPANDED-NEXT:   $sgpr11 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 7
+  ; EXPANDED-NEXT:   $sgpr12 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 8
+  ; EXPANDED-NEXT:   $sgpr13 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 9
   ; EXPANDED-NEXT:   S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
   bb.0:
     S_NOP 0, implicit-def %0:sgpr_320
@@ -96,6 +97,7 @@ body: |
   ; SPILLED-NEXT: bb.2:
   ; SPILLED-NEXT:   $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 = SI_SPILL_V320_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s320) from %stack.0, align 4, addrspace 5)
   ; SPILLED-NEXT:   S_NOP 0, implicit killed renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
+  ;
   ; EXPANDED-LABEL: name: spill_restore_vgpr320
   ; EXPANDED: bb.0:
   ; EXPANDED-NEXT:   successors: %bb.1(0x80000000)
diff --git a/llvm/test/CodeGen/AMDGPU/spill352.mir b/llvm/test/CodeGen/AMDGPU/spill352.mir
index 2ed4437aa57cd16..fc8e72eca05ff31 100644
--- a/llvm/test/CodeGen/AMDGPU/spill352.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill352.mir
@@ -27,23 +27,24 @@ body: |
   ; SPILLED-NEXT: bb.2:
   ; SPILLED-NEXT:   $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14 = SI_SPILL_S352_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s352) from %stack.0, align 4, addrspace 5)
   ; SPILLED-NEXT:   S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14
+  ;
   ; EXPANDED-LABEL: name: spill_restore_sgpr352
   ; EXPANDED: bb.0:
   ; EXPANDED-NEXT:   successors: %bb.1(0x80000000)
   ; EXPANDED-NEXT: {{  $}}
   ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
   ; EXPANDED-NEXT:   S_NOP 0, implicit-def renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14
-  ; EXPANDED-NEXT:   [[V_WRITELANE_B32_:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr4, 0, [[V_WRITELANE_B32_]], implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14
-  ; EXPANDED-NEXT:   [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr5, 1, [[V_WRITELANE_B32_1]]
-  ; EXPANDED-NEXT:   [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr6, 2, [[V_WRITELANE_B32_1]]
-  ; EXPANDED-NEXT:   [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr7, 3, [[V_WRITELANE_B32_1]]
-  ; EXPANDED-NEXT:   [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr8, 4, [[V_WRITELANE_B32_1]]
-  ; EXPANDED-NEXT:   [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr9, 5, [[V_WRITELANE_B32_1]]
-  ; EXPANDED-NEXT:   [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr10, 6, [[V_WRITELANE_B32_1]]
-  ; EXPANDED-NEXT:   [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr11, 7, [[V_WRITELANE_B32_1]]
-  ; EXPANDED-NEXT:   [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr12, 8, [[V_WRITELANE_B32_1]]
-  ; EXPANDED-NEXT:   [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr13, 9, [[V_WRITELANE_B32_1]]
-  ; EXPANDED-NEXT:   [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 killed $sgpr14, 10, [[V_WRITELANE_B32_1]], implicit killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14
+  ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, [[DEF]], implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14
+  ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr5, 1, [[DEF]]
+  ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr6, 2, [[DEF]]
+  ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr7, 3, [[DEF]]
+  ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr8, 4, [[DEF]]
+  ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr9, 5, [[DEF]]
+  ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr10, 6, [[DEF]]
+  ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr11, 7, [[DEF]]
+  ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr12, 8, [[DEF]]
+  ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr13, 9, [[DEF]]
+  ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr14, 10, [[DEF]], implicit killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14
   ; EXPANDED-NEXT:   S_CBRANCH_SCC1 %bb.1, implicit undef $scc
   ; EXPANDED-NEXT: {{  $}}
   ; EXPANDED-NEXT: bb.1:
@@ -52,17 +53,17 @@ body: |
   ; EXPANDED-NEXT:   S_NOP 1
   ; EXPANDED-NEXT: {{  $}}
   ; EXPANDED-NEXT: bb.2:
-  ; EXPANDED-NEXT:   $sgpr4 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14
-  ; EXPANDED-NEXT:   $sgpr5 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 1
-  ; EXPANDED-NEXT:   $sgpr6 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 2
-  ; EXPANDED-NEXT:   $sgpr7 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 3
-  ; EXPANDED-NEXT:   $sgpr8 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 4
-  ; EXPANDED-NEXT:   $sgpr9 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 5
-  ; EXPANDED-NEXT:   $sgpr10 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 6
-  ; EXPANDED-NEXT:   $sgpr11 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 7
-  ; EXPANDED-NEXT:   $sgpr12 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 8
-  ; EXPANDED-NEXT:   $sgpr13 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 9
-  ; EXPANDED-NEXT:   $sgpr14 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 10
+  ; EXPANDED-NEXT:   $sgpr4 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14
+  ; EXPANDED-NEXT:   $sgpr5 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 1
+  ; EXPANDED-NEXT:   $sgpr6 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 2
+  ; EXPANDED-NEXT:   $sgpr7 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 3
+  ; EXPANDED-NEXT:   $sgpr8 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 4
+  ; EXPANDED-NEXT:   $sgpr9 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 5
+  ; EXPANDED-NEXT:   $sgpr10 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 6
+  ; EXPANDED-NEXT:   $sgpr11 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 7
+  ; EXPANDED-NEXT:   $sgpr12 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 8
+  ; EXPANDED-NEXT:   $sgpr13 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 9
+  ; EXPANDED-NEXT:   $sgpr14 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 10
   ; EXPANDED-NEXT:   S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14
   bb.0:
     S_NOP 0, implicit-def %0:sgpr_352
@@ -98,6 +99,7 @@ body: |
   ; SPILLED-NEXT: bb.2:
   ; SPILLED-NEXT:   $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10 = SI_SPILL_V352_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s352) from %stack.0, align 4, addrspace 5)
   ; SPILLED-NEXT:   S_NOP 0, implicit killed renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10
+  ;
   ; EXPANDED-LABEL: name: spill_restore_vgpr352
   ; EXPANDED: bb.0:
   ; EXPANDED-NEXT:   successors: %bb.1(0x80000000)
diff --git a/llvm/test/CodeGen/AMDGPU/spill384.mir b/llvm/test/CodeGen/AMDGPU/spill384.mir
index c88bfe20a75fe32..3d6fe33f98950f7 100644
--- a/llvm/test/CodeGen/AMDGPU/spill384.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill384.mir
@@ -27,24 +27,25 @@ body: |
   ; SPILLED-NEXT: bb.2:
   ; SPILLED-NEXT:   $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = SI_SPILL_S384_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s384) from %stack.0, align 4, addrspace 5)
   ; SPILLED-NEXT:   S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+  ;
   ; EXPANDED-LABEL: name: spill_restore_sgpr384
   ; EXPANDED: bb.0:
   ; EXPANDED-NEXT:   successors: %bb.1(0x80000000)
   ; EXPANDED-NEXT: {{  $}}
   ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
   ; EXPANDED-NEXT:   S_NOP 0, implicit-def renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
-  ; EXPANDED-NEXT:   [[V_WRITELANE_B32_:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr4, 0, [[V_WRITELANE_B32_]], implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
-  ; EXPANDED-NEXT:   [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr5, 1, [[V_WRITELANE_B32_1]]
-  ; EXPANDED-NEXT:   [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr6, 2, [[V_WRITELANE_B32_1]]
-  ; EXPANDED-NEXT:   [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr7, 3, [[V_WRITELANE_B32_1]]
-  ; EXPANDED-NEXT:   [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr8, 4, [[V_WRITELANE_B32_1]]
-  ; EXPANDED-NEXT:   [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr9, 5, [[V_WRITELANE_B32_1]]
-  ; EXPANDED-NEXT:   [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr10, 6, [[V_WRITELANE_B32_1]]
-  ; EXPANDED-NEXT:   [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr11, 7, [[V_WRITELANE_B32_1]]
-  ; EXPANDED-NEXT:   [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr12, 8, [[V_WRITELANE_B32_1]]
-  ; EXPANDED-NEXT:   [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr13, 9, [[V_WRITELANE_B32_1]]
-  ; EXPANDED-NEXT:   [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr14, 10, [[V_WRITELANE_B32_1]]
-  ; EXPANDED-NEXT:   [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 killed $sgpr15, 11, [[V_WRITELANE_B32_1]], implicit killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+  ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, [[DEF]], implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+  ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr5, 1, [[DEF]]
+  ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr6, 2, [[DEF]]
+  ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr7, 3, [[DEF]]
+  ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr8, 4, [[DEF]]
+  ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr9, 5, [[DEF]]
+  ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr10, 6, [[DEF]]
+  ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr11, 7, [[DEF]]
+  ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr12, 8, [[DEF]]
+  ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr13, 9, [[DEF]]
+  ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr14, 10, [[DEF]]
+  ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr15, 11, [[DEF]], implicit killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
   ; EXPANDED-NEXT:   S_CBRANCH_SCC1 %bb.1, implicit undef $scc
   ; EXPANDED-NEXT: {{  $}}
   ; EXPANDED-NEXT: bb.1:
@@ -53,18 +54,18 @@ body: |
   ; EXPANDED-NEXT:   S_NOP 1
   ; EXPANDED-NEXT: {{  $}}
   ; EXPANDED-NEXT: bb.2:
-  ; EXPANDED-NEXT:   $sgpr4 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
-  ; EXPANDED-NEXT:   $sgpr5 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 1
-  ; EXPANDED-NEXT:   $sgpr6 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 2
-  ; EXPANDED-NEXT:   $sgpr7 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 3
-  ; EXPANDED-NEXT:   $sgpr8 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 4
-  ; EXPANDED-NEXT:   $sgpr9 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 5
-  ; EXPANDED-NEXT:   $sgpr10 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 6
-  ; EXPANDED-NEXT:   $sgpr11 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 7
-  ; EXPANDED-NEXT:   $sgpr12 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 8
-  ; EXPANDED-NEXT:   $sgpr13 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 9
-  ; EXPANDED-NEXT:   $sgpr14 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 10
-  ; EXPANDED-NEXT:   $sgpr15 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 11
+  ; EXPANDED-NEXT:   $sgpr4 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+  ; EXPANDED-NEXT:   $sgpr5 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 1
+  ; EXPANDED-NEXT:   $sgpr6 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 2
+  ; EXPANDED-NEXT:   $sgpr7 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 3
+  ; EXPANDED-NEXT:   $sgpr8 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 4
+  ; EXPANDED-NEXT:   $sgpr9 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 5
+  ; EXPANDED-NEXT:   $sgpr10 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 6
+  ; EXPANDED-NEXT:   $sgpr11 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 7
+  ; EXPANDED-NEXT:   $sgpr12 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 8
+  ; EXPANDED-NEXT:   $sgpr13 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 9
+  ; EXPANDED-NEXT:   $sgpr14 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 10
+  ; EXPANDED-NEXT:   $sgpr15 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 11
   ; EXPANDED-NEXT:   S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
   bb.0:
     S_NOP 0, implicit-def %0:sgpr_384
@@ -100,6 +101,7 @@ body: |
   ; SPILLED-NEXT: bb.2:
   ; SPILLED-NEXT:   $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 = SI_SPILL_V384_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s384) from %stack.0, align 4, addrspace 5)
   ; SPILLED-NEXT:   S_NOP 0, implicit killed renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11
+  ;
   ; EXPANDED-LABEL: name: spill_restore_vgpr384
   ; EXPANDED: bb.0:
   ; EXPANDED-NEXT:   successors: %bb.1(0x80000000)
diff --git a/llvm/test/CodeGen/AMDGPU/tied-op-for-wwm-scratch-reg-spill-restore.mir b/llvm/test/CodeGen/AMDGPU/tied-op-for-wwm-scratch-reg-spill-restore.mir
index c4fde5dd2da4562..2c4a5dba3520cd4 100644
--- a/llvm/test/CodeGen/AMDGPU/tied-op-for-wwm-scratch-reg-spill-restore.mir
+++ b/llvm/test/CodeGen/AMDGPU/tied-op-for-wwm-scratch-reg-spill-restore.mir
@@ -23,14 +23,14 @@ body:             |
     ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
     ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
     ; GCN-NEXT: $vgpr0 = IMPLICIT_DEF
-    ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr20, 0, $vgpr0
+    ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr0
     ; GCN-NEXT: $vgpr0 = COPY killed renamable $vgpr1, implicit $exec
     ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
     ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $vgpr0(tied-def 0) :: (load (s32) from %stack.0, addrspace 5)
     ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
     ; GCN-NEXT: SI_RETURN implicit $vgpr0
     $vgpr0 = IMPLICIT_DEF
-    $vgpr0 = V_WRITELANE_B32 killed $sgpr20, 0, $vgpr0
+    $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr0
     $vgpr0 = COPY killed renamable $vgpr1, implicit $exec
     SI_RETURN implicit $vgpr0
 ...
@@ -58,8 +58,8 @@ body:             |
     ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
     ; GCN-NEXT: $vgpr0 = IMPLICIT_DEF
     ; GCN-NEXT: $vgpr2 = IMPLICIT_DEF
-    ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr20, 0, $vgpr0
-    ; GCN-NEXT: $vgpr2 = V_WRITELANE_B32 killed $sgpr21, 0, $vgpr2
+    ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr0
+    ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR killed $sgpr21, 0, $vgpr2
     ; GCN-NEXT: $vgpr0 = COPY $vgpr1, implicit $exec
     ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
     ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $vgpr0(tied-def 0) :: (load (s32) from %stack.0, addrspace 5)
@@ -68,8 +68,8 @@ body:             |
     ; GCN-NEXT: SI_RETURN implicit $vgpr0_vgpr1
     $vgpr0 = IMPLICIT_DEF
     $vgpr2 = IMPLICIT_DEF
-    $vgpr0 = V_WRITELANE_B32 killed $sgpr20, 0, $vgpr0
-    $vgpr2 = V_WRITELANE_B32 killed $sgpr21, 0, $vgpr2
+    $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr0
+    $vgpr2 = SI_SPILL_S32_TO_VGPR killed $sgpr21, 0, $vgpr2
     $vgpr0 = COPY $vgpr1, implicit $exec
     SI_RETURN implicit $vgpr0_vgpr1
 ...
@@ -95,14 +95,14 @@ body:             |
     ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
     ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
     ; GCN-NEXT: $vgpr2 = IMPLICIT_DEF
-    ; GCN-NEXT: $vgpr2 = V_WRITELANE_B32 killed $sgpr20, 0, $vgpr2
+    ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr2
     ; GCN-NEXT: $vgpr0 = COPY $vgpr1, implicit $exec
     ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
     ; GCN-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
     ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
     ; GCN-NEXT: SI_RETURN implicit $vgpr0_vgpr1
     $vgpr2 = IMPLICIT_DEF
-    $vgpr2 = V_WRITELANE_B32 killed $sgpr20, 0, $vgpr2
+    $vgpr2 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr2
     $vgpr0 = COPY $vgpr1, implicit $exec
     SI_RETURN implicit $vgpr0_vgpr1
 ...
@@ -127,16 +127,16 @@ body:             |
     ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
     ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
     ; GCN-NEXT: $vgpr40 = IMPLICIT_DEF
-    ; GCN-NEXT: $vgpr40 = V_WRITELANE_B32 killed $sgpr20, 0, $vgpr40
-    ; GCN-NEXT: $sgpr20 = V_READLANE_B32 $vgpr40, 0, implicit $exec
+    ; GCN-NEXT: $vgpr40 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr40
+    ; GCN-NEXT: $sgpr20 = SI_RESTORE_S32_FROM_VGPR $vgpr40, 0, implicit $exec
     ; GCN-NEXT: $vgpr0 = COPY killed $vgpr1, implicit $exec
     ; GCN-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
     ; GCN-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
     ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
     ; GCN-NEXT: SI_RETURN implicit $vgpr0
     $vgpr40 = IMPLICIT_DEF
-    $vgpr40 = V_WRITELANE_B32 killed $sgpr20, 0, $vgpr40
-    $sgpr20 = V_READLANE_B32 $vgpr40, 0, implicit $exec
+    $vgpr40 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr40
+    $sgpr20 = SI_RESTORE_S32_FROM_VGPR $vgpr40, 0, implicit $exec
     $vgpr0 = COPY killed $vgpr1, implicit $exec
     SI_RETURN implicit $vgpr0
 ...
diff --git a/llvm/test/CodeGen/AMDGPU/track-spilled-vgpr-liveness.mir b/llvm/test/CodeGen/AMDGPU/track-spilled-vgpr-liveness.mir
index e5aec9987bdad0b..4122a530ee861e4 100644
--- a/llvm/test/CodeGen/AMDGPU/track-spilled-vgpr-liveness.mir
+++ b/llvm/test/CodeGen/AMDGPU/track-spilled-vgpr-liveness.mir
@@ -18,15 +18,9 @@ body:             |
     ; GCN-LABEL: name: vgpr_use_after_prolog_spill
     ; GCN: liveins: $sgpr42, $vgpr0, $sgpr30_sgpr31
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
     ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 8192, killed $vgpr0, implicit $exec
     ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr42, 0, $vgpr0
     ; GCN-NEXT: S_NOP 0, implicit-def $vgpr0
-    ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
-    ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
     ; GCN-NEXT: S_SETPC_B64_return $sgpr30_sgpr31
     $vgpr0 = V_ADD_U32_e32 8192, killed $vgpr0, implicit $exec
     $vgpr0 = V_WRITELANE_B32 killed $sgpr42, 0, $vgpr0
@@ -48,15 +42,9 @@ body:             |
     ; GCN-LABEL: name: livein_vgpr_def_after_prolog_spill
     ; GCN: liveins: $sgpr42, $vgpr0, $vgpr1, $sgpr30_sgpr31
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
     ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 8192, killed $vgpr1, implicit $exec
     ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr42, 0, $vgpr0
     ; GCN-NEXT: S_NOP 0, implicit-def $vgpr0
-    ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
-    ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
     ; GCN-NEXT: S_SETPC_B64_return $sgpr30_sgpr31
     $vgpr0 = V_ADD_U32_e32 8192, killed $vgpr1, implicit $exec
     $vgpr0 = V_WRITELANE_B32 killed $sgpr42, 0, $vgpr0
@@ -77,9 +65,6 @@ body:             |
   ; GCN-NEXT:   successors: %bb.1(0x80000000)
   ; GCN-NEXT:   liveins: $sgpr42, $vgpr0, $sgpr30_sgpr31
   ; GCN-NEXT: {{  $}}
-  ; GCN-NEXT:   $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-  ; GCN-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
   ; GCN-NEXT:   S_NOP 0
   ; GCN-NEXT:   S_BRANCH %bb.1
   ; GCN-NEXT: {{  $}}
@@ -88,9 +73,6 @@ body:             |
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   $vgpr0 = V_WRITELANE_B32 killed $sgpr42, 0, $vgpr0
   ; GCN-NEXT:   S_NOP 0, implicit-def $vgpr0
-  ; GCN-NEXT:   $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-  ; GCN-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
-  ; GCN-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
   ; GCN-NEXT:   S_SETPC_B64_return $sgpr30_sgpr31
   bb.0:
     liveins: $sgpr42, $vgpr0, $sgpr30_sgpr31
diff --git a/llvm/test/CodeGen/AMDGPU/use_restore_frame_reg.mir b/llvm/test/CodeGen/AMDGPU/use_restore_frame_reg.mir
index 3a4b4372b8b2d3a..33fb5951572561d 100644
--- a/llvm/test/CodeGen/AMDGPU/use_restore_frame_reg.mir
+++ b/llvm/test/CodeGen/AMDGPU/use_restore_frame_reg.mir
@@ -46,7 +46,7 @@ body:             |
   ; MUBUF-NEXT:   $sgpr5 = S_ADD_I32 $sgpr33, 9961728, implicit-def dead $scc
   ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, implicit $exec :: (store (s32) into %stack.20, addrspace 5)
   ; MUBUF-NEXT:   $exec = S_MOV_B64 killed $sgpr6_sgpr7
-  ; MUBUF-NEXT:   $vgpr2 = V_WRITELANE_B32 $sgpr4, 0, undef $vgpr2
+  ; MUBUF-NEXT:   $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, undef $vgpr2
   ; MUBUF-NEXT:   $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 11010048, implicit-def dead $scc
   ; MUBUF-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; MUBUF-NEXT:   S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
@@ -67,7 +67,7 @@ body:             |
   ; MUBUF-NEXT: bb.2:
   ; MUBUF-NEXT:   liveins: $vgpr2
   ; MUBUF-NEXT: {{  $}}
-  ; MUBUF-NEXT:   $sgpr4 = V_READLANE_B32 $vgpr2, 0
+  ; MUBUF-NEXT:   $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 0
   ; MUBUF-NEXT:   $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
   ; MUBUF-NEXT:   $sgpr5 = S_ADD_I32 $sgpr33, 9961728, implicit-def dead $scc
   ; MUBUF-NEXT:   $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, implicit $exec :: (load (s32) from %stack.20, addrspace 5)
@@ -75,6 +75,7 @@ body:             |
   ; MUBUF-NEXT:   $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -11010048, implicit-def dead $scc
   ; MUBUF-NEXT:   $sgpr33 = COPY $sgpr4
   ; MUBUF-NEXT:   S_ENDPGM 0
+  ;
   ; FLATSCR-LABEL: name: use_restore_frame_reg
   ; FLATSCR: bb.0:
   ; FLATSCR-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
@@ -87,7 +88,7 @@ body:             |
   ; FLATSCR-NEXT:   $sgpr5 = S_ADD_I32 $sgpr33, 155652, implicit-def dead $scc
   ; FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR $vgpr2, killed $sgpr5, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.20, addrspace 5)
   ; FLATSCR-NEXT:   $exec = S_MOV_B64 killed $sgpr6_sgpr7
-  ; FLATSCR-NEXT:   $vgpr2 = V_WRITELANE_B32 $sgpr4, 0, undef $vgpr2
+  ; FLATSCR-NEXT:   $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, undef $vgpr2
   ; FLATSCR-NEXT:   $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 172032, implicit-def dead $scc
   ; FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; FLATSCR-NEXT:   S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
@@ -117,7 +118,7 @@ body:             |
   ; FLATSCR-NEXT: bb.2:
   ; FLATSCR-NEXT:   liveins: $vgpr2
   ; FLATSCR-NEXT: {{  $}}
-  ; FLATSCR-NEXT:   $sgpr4 = V_READLANE_B32 $vgpr2, 0
+  ; FLATSCR-NEXT:   $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 0
   ; FLATSCR-NEXT:   $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
   ; FLATSCR-NEXT:   $sgpr5 = S_ADD_I32 $sgpr33, 155652, implicit-def dead $scc
   ; FLATSCR-NEXT:   $vgpr2 = SCRATCH_LOAD_DWORD_SADDR killed $sgpr5, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.20, addrspace 5)

>From a218ab9cdedf16aee107e89037405ac07cc2a320 Mon Sep 17 00:00:00 2001
From: Christudasan Devadasan <Christudasan.Devadasan at amd.com>
Date: Mon, 23 Oct 2023 14:03:03 +0530
Subject: [PATCH 2/2] [AMDGPU] Improve isBasicBlockPrologue helper function

This function helps to skip over the basic block prolog
instructions while inserting a new spill or a copy during
liverange split. However, this appears to be incomplete.
It currently skips only the instructions that restores
the exec mask. It should also have skipped over the BB
prolog spills and the liverange split copies inserted
earlier during regalloc while dealing with the BBLiveIns.

This patch partially addresses the limitation by including
only the spills in the query. The live-range-split COPY
handling is pending.

This helper function is also used at multiple places to
skip over the bb prolog. They should continue produce
functionally correct code even after this patch.
---
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp        |  10 +-
 llvm/lib/Target/AMDGPU/SIInstrInfo.h          |   5 +
 .../GlobalISel/image-waterfall-loop-O0.ll     |   5 +-
 llvm/test/CodeGen/AMDGPU/collapse-endcf.ll    |  71 +--
 .../AMDGPU/control-flow-fastregalloc.ll       |  18 +-
 .../AMDGPU/cross-block-use-is-not-abi-copy.ll |   2 +-
 ...uf-legalize-operands-non-ptr-intrinsics.ll |  54 +-
 .../CodeGen/AMDGPU/mubuf-legalize-operands.ll |  58 ++-
 .../AMDGPU/partial-sgpr-to-vgpr-spills.ll     |  33 +-
 .../CodeGen/AMDGPU/sgpr-spill-no-vgprs.ll     |   7 +-
 .../CodeGen/AMDGPU/spill-scavenge-offset.ll   | 489 +++++++++---------
 llvm/test/CodeGen/AMDGPU/swdev380865.ll       |  16 +-
 .../AMDGPU/vgpr-spill-placement-issue61083.ll |   3 +-
 .../test/CodeGen/AMDGPU/wwm-reserved-spill.ll |   6 +-
 14 files changed, 410 insertions(+), 367 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 83690fcee30958d..7a3d7582b181d6f 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -8335,8 +8335,14 @@ unsigned SIInstrInfo::getLiveRangeSplitOpcode(Register SrcReg,
 }
 
 bool SIInstrInfo::isBasicBlockPrologue(const MachineInstr &MI) const {
-  return !MI.isTerminator() && MI.getOpcode() != AMDGPU::COPY &&
-         MI.modifiesRegister(AMDGPU::EXEC, &RI);
+  // It should cover all possible instructions inserted during regalloc in the
+  // block prolog.
+  uint16_t Opc = MI.getOpcode();
+
+  // FIXME: Copies inserted in the block prolog for live-range split should also
+  // be included.
+  return (isSpillOpcode(Opc) || (!MI.isTerminator() && Opc != AMDGPU::COPY &&
+                                 MI.modifiesRegister(AMDGPU::EXEC, &RI)));
 }
 
 MachineInstrBuilder
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index 5ef17c44f7de389..34453c41109580d 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -671,6 +671,11 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
     return get(Opcode).TSFlags & SIInstrFlags::SGPRSpill;
   }
 
+  bool isSpillOpcode(uint16_t Opcode) const {
+    return get(Opcode).TSFlags &
+           (SIInstrFlags::SGPRSpill | SIInstrFlags::VGPRSpill);
+  }
+
   static bool isWWMRegSpillOpcode(uint16_t Opcode) {
     return Opcode == AMDGPU::SI_SPILL_WWM_V32_SAVE ||
            Opcode == AMDGPU::SI_SPILL_WWM_AV32_SAVE ||
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll
index b19230c2e876c4f..10cbc56cc5fbea1 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll
@@ -144,8 +144,6 @@ define <4 x float> @waterfall_loop(<8 x i32> %vgpr_srd) {
 ; CHECK-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
 ; CHECK-NEXT:    s_mov_b32 exec_lo, s21
 ; CHECK-NEXT:  ; %bb.2: ; in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
-; CHECK-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
 ; CHECK-NEXT:    s_or_saveexec_b32 s21, -1
 ; CHECK-NEXT:    buffer_load_dword v2, off, s[0:3], s32 ; 4-byte Folded Reload
 ; CHECK-NEXT:    s_mov_b32 exec_lo, s21
@@ -163,6 +161,9 @@ define <4 x float> @waterfall_loop(<8 x i32> %vgpr_srd) {
 ; CHECK-NEXT:    v_readlane_b32 s17, v2, 1
 ; CHECK-NEXT:    v_readlane_b32 s18, v2, 2
 ; CHECK-NEXT:    v_readlane_b32 s19, v2, 3
+; CHECK-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
+; CHECK-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
+; CHECK-NEXT:    s_waitcnt vmcnt(0)
 ; CHECK-NEXT:    image_sample v0, v[0:1], s[8:15], s[16:19] dmask:0x1 dim:SQ_RSRC_IMG_2D
 ; CHECK-NEXT:    s_waitcnt vmcnt(0)
 ; CHECK-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
diff --git a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
index 73d5088141cdb31..dc519df52919add 100644
--- a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
+++ b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
@@ -75,10 +75,10 @@ define amdgpu_kernel void @simple_nested_if(ptr addrspace(1) nocapture %arg) {
 ; GCN-O0-NEXT:    s_waitcnt expcnt(0)
 ; GCN-O0-NEXT:    buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[8:9]
-; GCN-O0-NEXT:    buffer_load_dword v1, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
-; GCN-O0-NEXT:    s_waitcnt vmcnt(1)
+; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-O0-NEXT:    v_readlane_b32 s4, v0, 0
 ; GCN-O0-NEXT:    v_readlane_b32 s5, v0, 1
+; GCN-O0-NEXT:    buffer_load_dword v1, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    s_mov_b32 s2, 0xf000
 ; GCN-O0-NEXT:    s_mov_b32 s0, 0
 ; GCN-O0-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1
@@ -104,7 +104,6 @@ define amdgpu_kernel void @simple_nested_if(ptr addrspace(1) nocapture %arg) {
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[0:1]
 ; GCN-O0-NEXT:    s_cbranch_execz .LBB0_3
 ; GCN-O0-NEXT:  ; %bb.2: ; %bb.inner.then
-; GCN-O0-NEXT:    buffer_load_dword v1, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    s_or_saveexec_b64 s[8:9], -1
 ; GCN-O0-NEXT:    s_waitcnt expcnt(0)
 ; GCN-O0-NEXT:    buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
@@ -112,7 +111,9 @@ define amdgpu_kernel void @simple_nested_if(ptr addrspace(1) nocapture %arg) {
 ; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-O0-NEXT:    v_readlane_b32 s0, v0, 0
 ; GCN-O0-NEXT:    v_readlane_b32 s1, v0, 1
+; GCN-O0-NEXT:    buffer_load_dword v1, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    v_mov_b32_e32 v0, 1
+; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-O0-NEXT:    v_add_i32_e64 v1, s[2:3], v1, v0
 ; GCN-O0-NEXT:    v_ashrrev_i32_e64 v3, 31, v1
 ; GCN-O0-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
@@ -248,10 +249,10 @@ define amdgpu_kernel void @uncollapsable_nested_if(ptr addrspace(1) nocapture %a
 ; GCN-O0-NEXT:    s_waitcnt expcnt(0)
 ; GCN-O0-NEXT:    buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[8:9]
-; GCN-O0-NEXT:    buffer_load_dword v1, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
-; GCN-O0-NEXT:    s_waitcnt vmcnt(1)
+; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-O0-NEXT:    v_readlane_b32 s4, v0, 0
 ; GCN-O0-NEXT:    v_readlane_b32 s5, v0, 1
+; GCN-O0-NEXT:    buffer_load_dword v1, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    s_mov_b32 s2, 0xf000
 ; GCN-O0-NEXT:    s_mov_b32 s0, 0
 ; GCN-O0-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1
@@ -277,7 +278,6 @@ define amdgpu_kernel void @uncollapsable_nested_if(ptr addrspace(1) nocapture %a
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[0:1]
 ; GCN-O0-NEXT:    s_cbranch_execz .LBB1_4
 ; GCN-O0-NEXT:  ; %bb.2: ; %bb.inner.then
-; GCN-O0-NEXT:    buffer_load_dword v1, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    s_or_saveexec_b64 s[8:9], -1
 ; GCN-O0-NEXT:    s_waitcnt expcnt(0)
 ; GCN-O0-NEXT:    buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
@@ -285,7 +285,9 @@ define amdgpu_kernel void @uncollapsable_nested_if(ptr addrspace(1) nocapture %a
 ; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-O0-NEXT:    v_readlane_b32 s0, v0, 0
 ; GCN-O0-NEXT:    v_readlane_b32 s1, v0, 1
+; GCN-O0-NEXT:    buffer_load_dword v1, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    v_mov_b32_e32 v0, 1
+; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-O0-NEXT:    v_add_i32_e64 v1, s[2:3], v1, v0
 ; GCN-O0-NEXT:    v_ashrrev_i32_e64 v3, 31, v1
 ; GCN-O0-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
@@ -311,7 +313,6 @@ define amdgpu_kernel void @uncollapsable_nested_if(ptr addrspace(1) nocapture %a
 ; GCN-O0-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; GCN-O0-NEXT:    s_branch .LBB1_5
 ; GCN-O0-NEXT:  .LBB1_4: ; %bb.inner.end
-; GCN-O0-NEXT:    buffer_load_dword v1, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    s_or_saveexec_b64 s[8:9], -1
 ; GCN-O0-NEXT:    s_waitcnt expcnt(0)
 ; GCN-O0-NEXT:    buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
@@ -322,7 +323,9 @@ define amdgpu_kernel void @uncollapsable_nested_if(ptr addrspace(1) nocapture %a
 ; GCN-O0-NEXT:    s_or_b64 exec, exec, s[2:3]
 ; GCN-O0-NEXT:    v_readlane_b32 s0, v0, 0
 ; GCN-O0-NEXT:    v_readlane_b32 s1, v0, 1
+; GCN-O0-NEXT:    buffer_load_dword v1, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    v_mov_b32_e32 v0, 2
+; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-O0-NEXT:    v_add_i32_e64 v1, s[2:3], v1, v0
 ; GCN-O0-NEXT:    v_ashrrev_i32_e64 v3, 31, v1
 ; GCN-O0-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
@@ -508,7 +511,6 @@ define amdgpu_kernel void @nested_if_if_else(ptr addrspace(1) nocapture %arg) {
 ; GCN-O0-NEXT:    s_xor_b64 exec, exec, s[0:1]
 ; GCN-O0-NEXT:    s_cbranch_execz .LBB2_5
 ; GCN-O0-NEXT:  ; %bb.3: ; %bb.then
-; GCN-O0-NEXT:    buffer_load_dword v1, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    s_or_saveexec_b64 s[6:7], -1
 ; GCN-O0-NEXT:    s_waitcnt expcnt(0)
 ; GCN-O0-NEXT:    buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
@@ -516,7 +518,9 @@ define amdgpu_kernel void @nested_if_if_else(ptr addrspace(1) nocapture %arg) {
 ; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-O0-NEXT:    v_readlane_b32 s0, v0, 0
 ; GCN-O0-NEXT:    v_readlane_b32 s1, v0, 1
+; GCN-O0-NEXT:    buffer_load_dword v1, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    v_mov_b32_e32 v0, 1
+; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-O0-NEXT:    v_add_i32_e64 v1, s[2:3], v1, v0
 ; GCN-O0-NEXT:    v_ashrrev_i32_e64 v3, 31, v1
 ; GCN-O0-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
@@ -532,7 +536,6 @@ define amdgpu_kernel void @nested_if_if_else(ptr addrspace(1) nocapture %arg) {
 ; GCN-O0-NEXT:    buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
 ; GCN-O0-NEXT:    s_branch .LBB2_5
 ; GCN-O0-NEXT:  .LBB2_4: ; %bb.else
-; GCN-O0-NEXT:    buffer_load_dword v1, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    s_or_saveexec_b64 s[6:7], -1
 ; GCN-O0-NEXT:    s_waitcnt expcnt(0)
 ; GCN-O0-NEXT:    buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
@@ -540,7 +543,9 @@ define amdgpu_kernel void @nested_if_if_else(ptr addrspace(1) nocapture %arg) {
 ; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-O0-NEXT:    v_readlane_b32 s0, v0, 0
 ; GCN-O0-NEXT:    v_readlane_b32 s1, v0, 1
+; GCN-O0-NEXT:    buffer_load_dword v1, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    v_mov_b32_e32 v0, 2
+; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-O0-NEXT:    v_add_i32_e64 v1, s[2:3], v1, v0
 ; GCN-O0-NEXT:    v_ashrrev_i32_e64 v3, 31, v1
 ; GCN-O0-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
@@ -954,20 +959,21 @@ define amdgpu_kernel void @s_endpgm_unsafe_barrier(ptr addrspace(1) nocapture %a
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[0:1]
 ; GCN-O0-NEXT:    s_cbranch_execz .LBB4_2
 ; GCN-O0-NEXT:  ; %bb.1: ; %bb.then
-; GCN-O0-NEXT:    s_waitcnt expcnt(0)
-; GCN-O0-NEXT:    buffer_load_dword v0, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    s_or_saveexec_b64 s[6:7], -1
 ; GCN-O0-NEXT:    buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[6:7]
 ; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-O0-NEXT:    v_readlane_b32 s0, v1, 0
 ; GCN-O0-NEXT:    v_readlane_b32 s1, v1, 1
+; GCN-O0-NEXT:    s_waitcnt expcnt(0)
+; GCN-O0-NEXT:    buffer_load_dword v0, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    s_mov_b32 s2, 0xf000
 ; GCN-O0-NEXT:    s_mov_b32 s4, 0
 ; GCN-O0-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5
 ; GCN-O0-NEXT:    s_mov_b32 s5, s2
 ; GCN-O0-NEXT:    ; kill: def $sgpr0_sgpr1 killed $sgpr0_sgpr1 def $sgpr0_sgpr1_sgpr2_sgpr3
 ; GCN-O0-NEXT:    s_mov_b64 s[2:3], s[4:5]
+; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-O0-NEXT:    v_ashrrev_i32_e64 v2, 31, v0
 ; GCN-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
 ; GCN-O0-NEXT:    v_mov_b32_e32 v1, v2
@@ -1103,14 +1109,14 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 {
 ; GCN-O0-NEXT:    s_waitcnt expcnt(0)
 ; GCN-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[14:15]
-; GCN-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
-; GCN-O0-NEXT:    s_waitcnt vmcnt(1)
+; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-O0-NEXT:    v_readlane_b32 s8, v0, 2
 ; GCN-O0-NEXT:    v_readlane_b32 s9, v0, 3
 ; GCN-O0-NEXT:    v_readlane_b32 s6, v0, 0
 ; GCN-O0-NEXT:    v_readlane_b32 s7, v0, 1
 ; GCN-O0-NEXT:    v_writelane_b32 v0, s6, 4
 ; GCN-O0-NEXT:    v_writelane_b32 v0, s7, 5
+; GCN-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    s_mov_b32 s4, 0x207
 ; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-O0-NEXT:    v_cmp_lt_i32_e64 s[4:5], v1, s4
@@ -1133,11 +1139,11 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 {
 ; GCN-O0-NEXT:    s_waitcnt expcnt(0)
 ; GCN-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[14:15]
-; GCN-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
-; GCN-O0-NEXT:    s_waitcnt vmcnt(1)
+; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-O0-NEXT:    v_readlane_b32 s4, v0, 6
 ; GCN-O0-NEXT:    v_readlane_b32 s5, v0, 7
 ; GCN-O0-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GCN-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    s_mov_b32 s6, 0
 ; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-O0-NEXT:    v_cmp_ne_u32_e64 s[4:5], v1, s6
@@ -1227,18 +1233,20 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 {
 ; GCN-O0-NEXT:    s_branch .LBB5_6
 ; GCN-O0-NEXT:  .LBB5_5: ; %Flow2
 ; GCN-O0-NEXT:    ; in Loop: Header=BB5_1 Depth=1
-; GCN-O0-NEXT:    s_waitcnt expcnt(0)
-; GCN-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
-; GCN-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
-; GCN-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
-; GCN-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    s_or_saveexec_b64 s[14:15], -1
+; GCN-O0-NEXT:    s_waitcnt expcnt(1)
 ; GCN-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[14:15]
 ; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-O0-NEXT:    v_readlane_b32 s4, v4, 10
 ; GCN-O0-NEXT:    v_readlane_b32 s5, v4, 11
 ; GCN-O0-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GCN-O0-NEXT:    s_waitcnt expcnt(0)
+; GCN-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
+; GCN-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
+; GCN-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
+; GCN-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
+; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
 ; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
@@ -1247,18 +1255,20 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 {
 ; GCN-O0-NEXT:    s_branch .LBB5_7
 ; GCN-O0-NEXT:  .LBB5_6: ; %Flow
 ; GCN-O0-NEXT:    ; in Loop: Header=BB5_1 Depth=1
-; GCN-O0-NEXT:    s_waitcnt expcnt(0)
-; GCN-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
-; GCN-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
-; GCN-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
-; GCN-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    s_or_saveexec_b64 s[14:15], -1
+; GCN-O0-NEXT:    s_waitcnt expcnt(1)
 ; GCN-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[14:15]
 ; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-O0-NEXT:    v_readlane_b32 s4, v4, 12
 ; GCN-O0-NEXT:    v_readlane_b32 s5, v4, 13
 ; GCN-O0-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GCN-O0-NEXT:    s_waitcnt expcnt(0)
+; GCN-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
+; GCN-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
+; GCN-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
+; GCN-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
+; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
 ; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
@@ -1302,11 +1312,6 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 {
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[14:15]
 ; GCN-O0-NEXT:  .LBB5_9: ; %Flow3
 ; GCN-O0-NEXT:    ; in Loop: Header=BB5_1 Depth=1
-; GCN-O0-NEXT:    s_waitcnt expcnt(0)
-; GCN-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
-; GCN-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
-; GCN-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
-; GCN-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    s_or_saveexec_b64 s[14:15], -1
 ; GCN-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[14:15]
@@ -1318,6 +1323,11 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 {
 ; GCN-O0-NEXT:    v_readlane_b32 s7, v4, 5
 ; GCN-O0-NEXT:    v_readlane_b32 s4, v4, 14
 ; GCN-O0-NEXT:    v_readlane_b32 s5, v4, 15
+; GCN-O0-NEXT:    s_waitcnt expcnt(0)
+; GCN-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
+; GCN-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
+; GCN-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
+; GCN-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    s_and_b64 s[4:5], exec, s[4:5]
 ; GCN-O0-NEXT:    s_or_b64 s[4:5], s[4:5], s[6:7]
 ; GCN-O0-NEXT:    s_mov_b64 s[6:7], 0
@@ -1332,6 +1342,7 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 {
 ; GCN-O0-NEXT:    s_or_saveexec_b64 s[14:15], -1
 ; GCN-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 ; 4-byte Folded Spill
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[14:15]
+; GCN-O0-NEXT:    s_waitcnt vmcnt(1)
 ; GCN-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
 ; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
diff --git a/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll b/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll
index d38e0072c94a5ab..6ef14d31ffa891d 100644
--- a/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll
+++ b/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll
@@ -46,9 +46,6 @@
 
 ; VMEM: [[ENDIF]]:
 
-; Restore val
-; GCN: buffer_load_dword [[RELOAD_VAL:v[0-9]+]], off, s[0:3], 0 offset:[[VAL_OFFSET]] ; 4-byte Folded Reload
-
 ; Reload and restore exec mask
 ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_LO_LANE]]
 ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_HI_LANE]]
@@ -60,6 +57,9 @@
 
 ; GCN: s_or_b64 exec, exec, s[[[S_RELOAD_SAVEEXEC_LO]]:[[S_RELOAD_SAVEEXEC_HI]]]
 
+; Restore val
+; GCN: buffer_load_dword [[RELOAD_VAL:v[0-9]+]], off, s[0:3], 0 offset:[[VAL_OFFSET]] ; 4-byte Folded Reload
+
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RELOAD_VAL]]
 
 ; VGPR: .amdhsa_private_segment_fixed_size 16
@@ -120,7 +120,6 @@ endif:
 ; GCN: buffer_store_dword v[[VAL_LOOP_RELOAD]], off, s[0:3], 0 offset:[[VAL_SUB_OFFSET:[0-9]+]] ; 4-byte Folded Spill
 
 ; GCN: [[END]]:
-; GCN: buffer_load_dword v[[VAL_END:[0-9]+]], off, s[0:3], 0 offset:[[VAL_SUB_OFFSET]] ; 4-byte Folded Reload
 ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_LO_LANE]]
 ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_HI_LANE]]
 
@@ -130,6 +129,7 @@ endif:
 ; VMEM: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], v[[V_RELOAD_SAVEEXEC]], 1
 
 ; GCN: s_or_b64 exec, exec, s[[[S_RELOAD_SAVEEXEC_LO]]:[[S_RELOAD_SAVEEXEC_HI]]]
+; GCN: buffer_load_dword v[[VAL_END:[0-9]+]], off, s[0:3], 0 offset:[[VAL_SUB_OFFSET]] ; 4-byte Folded Reload
 
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[VAL_END]]
 
@@ -176,7 +176,7 @@ end:
 ; Spill saved exec
 ; VGPR: v_writelane_b32 [[SPILL_VGPR:v[0-9]+]], s[[SAVEEXEC_LO]], [[SAVEEXEC_LO_LANE:[0-9]+]]
 ; VGPR: v_writelane_b32 [[SPILL_VGPR]], s[[SAVEEXEC_HI]], [[SAVEEXEC_HI_LANE:[0-9]+]]
-; VGPR: buffer_store_dword [[SPILL_VGPR]], off, s[0:3], 0 offset:[[VREG_SAVE_RESTORE_OFFSET:[0-9]+]] ; 4-byte Folded Spill
+; VGPR: buffer_store_dword [[SPILL_VGPR]], off, s[0:3], 0 offset:[[SAVEEXEC_OFFSET:[0-9]+]] ; 4-byte Folded Spill
 
 ; VMEM: v_writelane_b32 v[[V_SAVEEXEC:[0-9]+]], s[[SAVEEXEC_LO]], 0
 ; VMEM: v_writelane_b32 v[[V_SAVEEXEC]], s[[SAVEEXEC_HI]], 1
@@ -189,8 +189,7 @@ end:
 ; GCN-NEXT: s_branch [[ELSE:.LBB[0-9]+_[0-9]+]]
 
 ; GCN: [[FLOW]]: ; %Flow
-; VGPR: buffer_load_dword [[SPILL_VGPR:v[0-9]+]], off, s[0:3], 0 offset:[[VREG_SAVE_RESTORE_OFFSET]] ; 4-byte Folded Reload
-; GCN: buffer_load_dword [[FLOW_VAL:v[0-9]+]], off, s[0:3], 0 offset:[[FLOW_VAL_OFFSET:[0-9]+]] ; 4-byte Folded Reload
+; VGPR: buffer_load_dword [[SPILL_VGPR:v[0-9]+]], off, s[0:3], 0 offset:[[SAVEEXEC_OFFSET]] ; 4-byte Folded Reload
 ; VGPR: v_readlane_b32 s[[FLOW_S_RELOAD_SAVEEXEC_LO:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_LO_LANE]]
 ; VGPR: v_readlane_b32 s[[FLOW_S_RELOAD_SAVEEXEC_HI:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_HI_LANE]]
 
@@ -201,6 +200,8 @@ end:
 
 ; GCN: s_or_saveexec_b64 s[[[FLOW_S_RELOAD_SAVEEXEC_LO_SAVEEXEC:[0-9]+]]:[[FLOW_S_RELOAD_SAVEEXEC_HI_SAVEEXEC:[0-9]+]]], s[[[FLOW_S_RELOAD_SAVEEXEC_LO]]:[[FLOW_S_RELOAD_SAVEEXEC_HI]]]
 
+; GCN: buffer_load_dword [[FLOW_VAL:v[0-9]+]], off, s[0:3], 0 offset:[[FLOW_VAL_OFFSET:[0-9]+]] ; 4-byte Folded Reload
+
 ; Regular spill value restored after exec modification
 ; Followed by spill
 ; GCN: buffer_store_dword [[FLOW_VAL]], off, s[0:3], 0 offset:[[RESULT_OFFSET:[0-9]+]] ; 4-byte Folded Spill
@@ -233,7 +234,6 @@ end:
 ; GCN-NEXT: s_branch [[FLOW]]
 
 ; GCN: [[ENDIF]]:
-; GCN: buffer_load_dword v[[RESULT:[0-9]+]], off, s[0:3], 0 offset:[[RESULT_OFFSET]] ; 4-byte Folded Reload
 ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], [[SPILL_VGPR]], [[FLOW_SAVEEXEC_LO_LANE]]
 ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], [[SPILL_VGPR]], [[FLOW_SAVEEXEC_HI_LANE]]
 
@@ -245,6 +245,8 @@ end:
 
 ; GCN: s_or_b64 exec, exec, s[[[S_RELOAD_SAVEEXEC_LO]]:[[S_RELOAD_SAVEEXEC_HI]]]
 
+; GCN: buffer_load_dword v[[RESULT:[0-9]+]], off, s[0:3], 0 offset:[[RESULT_OFFSET]] ; 4-byte Folded Reload
+
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[RESULT]]
 define amdgpu_kernel void @divergent_if_else_endif(ptr addrspace(1) %out) #0 {
 entry:
diff --git a/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll b/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll
index 019b5ab593fe360..f97d15739be2965 100644
--- a/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll
+++ b/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll
@@ -148,9 +148,9 @@ define { i32, half } @call_split_type_used_outside_block_struct() #0 {
 ; GCN-NEXT:    s_add_u32 s16, s16, func_struct at rel32@lo+4
 ; GCN-NEXT:    s_addc_u32 s17, s17, func_struct at rel32@hi+12
 ; GCN-NEXT:    s_swappc_b64 s[30:31], s[16:17]
-; GCN-NEXT:    v_mov_b32_e32 v1, v4
 ; GCN-NEXT:    v_readlane_b32 s31, v40, 1
 ; GCN-NEXT:    v_readlane_b32 s30, v40, 0
+; GCN-NEXT:    v_mov_b32_e32 v1, v4
 ; GCN-NEXT:    v_readlane_b32 s4, v40, 2
 ; GCN-NEXT:    s_or_saveexec_b64 s[6:7], -1
 ; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll
index c877740c1baa9f0..fd261e8e2bb25e3 100644
--- a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll
+++ b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll
@@ -210,7 +210,6 @@ define float @mubuf_vgpr(<4 x i32> %i, i32 %c) #0 {
 ; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
 ; W64-O0-NEXT:  ; %bb.2: ; in Loop: Header=BB0_1 Depth=1
-; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
 ; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
@@ -222,7 +221,9 @@ define float @mubuf_vgpr(<4 x i32> %i, i32 %c) #0 {
 ; W64-O0-NEXT:    v_readlane_b32 s10, v1, 5
 ; W64-O0-NEXT:    v_readlane_b32 s11, v1, 6
 ; W64-O0-NEXT:    v_readlane_b32 s6, v1, 0
-; W64-O0-NEXT:    s_nop 4
+; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
+; W64-O0-NEXT:    s_waitcnt vmcnt(0)
+; W64-O0-NEXT:    s_nop 2
 ; W64-O0-NEXT:    buffer_load_format_x v0, v0, s[8:11], s6 idxen
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
 ; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
@@ -232,11 +233,11 @@ define float @mubuf_vgpr(<4 x i32> %i, i32 %c) #0 {
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
 ; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
-; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
-; W64-O0-NEXT:    s_waitcnt vmcnt(1)
+; W64-O0-NEXT:    s_waitcnt vmcnt(0)
 ; W64-O0-NEXT:    v_readlane_b32 s4, v1, 1
 ; W64-O0-NEXT:    v_readlane_b32 s5, v1, 2
 ; W64-O0-NEXT:    s_mov_b64 exec, s[4:5]
+; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    ; kill: killed $vgpr1
 ; W64-O0-NEXT:    s_xor_saveexec_b64 s[4:5], -1
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
@@ -601,7 +602,6 @@ define void @mubuf_vgpr_adjacent_in_block(<4 x i32> %i, <4 x i32> %j, i32 %c, pt
 ; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
 ; W64-O0-NEXT:  ; %bb.2: ; in Loop: Header=BB1_1 Depth=1
-; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
 ; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
@@ -613,7 +613,9 @@ define void @mubuf_vgpr_adjacent_in_block(<4 x i32> %i, <4 x i32> %j, i32 %c, pt
 ; W64-O0-NEXT:    v_readlane_b32 s10, v1, 5
 ; W64-O0-NEXT:    v_readlane_b32 s11, v1, 6
 ; W64-O0-NEXT:    v_readlane_b32 s6, v1, 0
-; W64-O0-NEXT:    s_nop 4
+; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
+; W64-O0-NEXT:    s_waitcnt vmcnt(0)
+; W64-O0-NEXT:    s_nop 2
 ; W64-O0-NEXT:    buffer_load_format_x v0, v0, s[8:11], s6 idxen
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
 ; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
@@ -668,7 +670,6 @@ define void @mubuf_vgpr_adjacent_in_block(<4 x i32> %i, <4 x i32> %j, i32 %c, pt
 ; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
 ; W64-O0-NEXT:  ; %bb.5: ; in Loop: Header=BB1_4 Depth=1
-; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
 ; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
@@ -680,7 +681,9 @@ define void @mubuf_vgpr_adjacent_in_block(<4 x i32> %i, <4 x i32> %j, i32 %c, pt
 ; W64-O0-NEXT:    v_readlane_b32 s10, v1, 13
 ; W64-O0-NEXT:    v_readlane_b32 s11, v1, 14
 ; W64-O0-NEXT:    v_readlane_b32 s6, v1, 0
-; W64-O0-NEXT:    s_nop 4
+; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
+; W64-O0-NEXT:    s_waitcnt vmcnt(0)
+; W64-O0-NEXT:    s_nop 2
 ; W64-O0-NEXT:    buffer_load_format_x v0, v0, s[8:11], s6 idxen
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
 ; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
@@ -690,16 +693,16 @@ define void @mubuf_vgpr_adjacent_in_block(<4 x i32> %i, <4 x i32> %j, i32 %c, pt
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
 ; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
+; W64-O0-NEXT:    s_waitcnt vmcnt(0)
+; W64-O0-NEXT:    v_readlane_b32 s4, v0, 9
+; W64-O0-NEXT:    v_readlane_b32 s5, v0, 10
+; W64-O0-NEXT:    s_mov_b64 exec, s[4:5]
 ; W64-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
-; W64-O0-NEXT:    s_waitcnt vmcnt(6)
-; W64-O0-NEXT:    v_readlane_b32 s4, v0, 9
-; W64-O0-NEXT:    v_readlane_b32 s5, v0, 10
-; W64-O0-NEXT:    s_mov_b64 exec, s[4:5]
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
 ; W64-O0-NEXT:    global_store_dword v[4:5], v6, off
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
@@ -1119,7 +1122,6 @@ define void @mubuf_vgpr_outside_entry(<4 x i32> %i, <4 x i32> %j, i32 %c, ptr ad
 ; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
 ; W64-O0-NEXT:  ; %bb.2: ; in Loop: Header=BB2_1 Depth=1
-; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
 ; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
@@ -1131,7 +1133,9 @@ define void @mubuf_vgpr_outside_entry(<4 x i32> %i, <4 x i32> %j, i32 %c, ptr ad
 ; W64-O0-NEXT:    v_readlane_b32 s10, v1, 6
 ; W64-O0-NEXT:    v_readlane_b32 s11, v1, 7
 ; W64-O0-NEXT:    v_readlane_b32 s6, v1, 1
-; W64-O0-NEXT:    s_nop 4
+; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
+; W64-O0-NEXT:    s_waitcnt vmcnt(0)
+; W64-O0-NEXT:    s_nop 2
 ; W64-O0-NEXT:    buffer_load_format_x v0, v0, s[8:11], s6 idxen
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
 ; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
@@ -1141,13 +1145,13 @@ define void @mubuf_vgpr_outside_entry(<4 x i32> %i, <4 x i32> %j, i32 %c, ptr ad
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
 ; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
-; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
-; W64-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
-; W64-O0-NEXT:    s_waitcnt vmcnt(2)
+; W64-O0-NEXT:    s_waitcnt vmcnt(0)
 ; W64-O0-NEXT:    v_readlane_b32 s6, v0, 2
 ; W64-O0-NEXT:    v_readlane_b32 s7, v0, 3
 ; W64-O0-NEXT:    s_mov_b64 exec, s[6:7]
 ; W64-O0-NEXT:    v_readlane_b32 s4, v0, 1
+; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_mov_b32 s5, 0x3ff
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
 ; W64-O0-NEXT:    v_and_b32_e64 v2, v2, s5
@@ -1214,7 +1218,6 @@ define void @mubuf_vgpr_outside_entry(<4 x i32> %i, <4 x i32> %j, i32 %c, ptr ad
 ; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
 ; W64-O0-NEXT:  ; %bb.6: ; in Loop: Header=BB2_5 Depth=1
-; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
 ; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
@@ -1226,14 +1229,15 @@ define void @mubuf_vgpr_outside_entry(<4 x i32> %i, <4 x i32> %j, i32 %c, ptr ad
 ; W64-O0-NEXT:    v_readlane_b32 s10, v1, 17
 ; W64-O0-NEXT:    v_readlane_b32 s11, v1, 18
 ; W64-O0-NEXT:    v_readlane_b32 s6, v1, 12
-; W64-O0-NEXT:    s_nop 4
+; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
+; W64-O0-NEXT:    s_waitcnt vmcnt(0)
+; W64-O0-NEXT:    s_nop 2
 ; W64-O0-NEXT:    buffer_load_format_x v0, v0, s[8:11], s6 idxen
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
 ; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    s_xor_b64 exec, exec, s[4:5]
 ; W64-O0-NEXT:    s_cbranch_execnz .LBB2_5
 ; W64-O0-NEXT:  ; %bb.7:
-; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
 ; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
@@ -1241,18 +1245,20 @@ define void @mubuf_vgpr_outside_entry(<4 x i32> %i, <4 x i32> %j, i32 %c, ptr ad
 ; W64-O0-NEXT:    v_readlane_b32 s4, v1, 13
 ; W64-O0-NEXT:    v_readlane_b32 s5, v1, 14
 ; W64-O0-NEXT:    s_mov_b64 exec, s[4:5]
+; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
+; W64-O0-NEXT:    s_waitcnt vmcnt(0)
 ; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
 ; W64-O0-NEXT:  .LBB2_8: ; %bb2
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
 ; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
-; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
-; W64-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
-; W64-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
-; W64-O0-NEXT:    s_waitcnt vmcnt(3)
+; W64-O0-NEXT:    s_waitcnt vmcnt(0)
 ; W64-O0-NEXT:    v_readlane_b32 s4, v0, 10
 ; W64-O0-NEXT:    v_readlane_b32 s5, v0, 11
 ; W64-O0-NEXT:    s_or_b64 exec, exec, s[4:5]
+; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
 ; W64-O0-NEXT:    global_store_dword v[1:2], v3, off
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll
index ac46f8ce20d60bf..72fbcc7bd9e40f5 100644
--- a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll
+++ b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll
@@ -224,7 +224,6 @@ define float @mubuf_vgpr(ptr addrspace(8) %i, i32 %c) #0 {
 ; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
 ; W64-O0-NEXT:  ; %bb.2: ; in Loop: Header=BB0_1 Depth=1
-; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
 ; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
@@ -236,7 +235,9 @@ define float @mubuf_vgpr(ptr addrspace(8) %i, i32 %c) #0 {
 ; W64-O0-NEXT:    v_readlane_b32 s10, v1, 5
 ; W64-O0-NEXT:    v_readlane_b32 s11, v1, 6
 ; W64-O0-NEXT:    v_readlane_b32 s6, v1, 0
-; W64-O0-NEXT:    s_nop 4
+; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
+; W64-O0-NEXT:    s_waitcnt vmcnt(0)
+; W64-O0-NEXT:    s_nop 2
 ; W64-O0-NEXT:    buffer_load_format_x v0, v0, s[8:11], s6 idxen
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
 ; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
@@ -246,11 +247,11 @@ define float @mubuf_vgpr(ptr addrspace(8) %i, i32 %c) #0 {
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
 ; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
-; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
-; W64-O0-NEXT:    s_waitcnt vmcnt(1)
+; W64-O0-NEXT:    s_waitcnt vmcnt(0)
 ; W64-O0-NEXT:    v_readlane_b32 s4, v1, 1
 ; W64-O0-NEXT:    v_readlane_b32 s5, v1, 2
 ; W64-O0-NEXT:    s_mov_b64 exec, s[4:5]
+; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    ; kill: killed $vgpr1
 ; W64-O0-NEXT:    s_xor_saveexec_b64 s[4:5], -1
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
@@ -640,7 +641,6 @@ define void @mubuf_vgpr_adjacent_in_block(ptr addrspace(8) %i, ptr addrspace(8)
 ; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
 ; W64-O0-NEXT:  ; %bb.2: ; in Loop: Header=BB1_1 Depth=1
-; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
 ; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
@@ -652,7 +652,9 @@ define void @mubuf_vgpr_adjacent_in_block(ptr addrspace(8) %i, ptr addrspace(8)
 ; W64-O0-NEXT:    v_readlane_b32 s10, v1, 5
 ; W64-O0-NEXT:    v_readlane_b32 s11, v1, 6
 ; W64-O0-NEXT:    v_readlane_b32 s6, v1, 0
-; W64-O0-NEXT:    s_nop 4
+; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
+; W64-O0-NEXT:    s_waitcnt vmcnt(0)
+; W64-O0-NEXT:    s_nop 2
 ; W64-O0-NEXT:    buffer_load_format_x v0, v0, s[8:11], s6 idxen
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
 ; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
@@ -707,7 +709,6 @@ define void @mubuf_vgpr_adjacent_in_block(ptr addrspace(8) %i, ptr addrspace(8)
 ; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
 ; W64-O0-NEXT:  ; %bb.5: ; in Loop: Header=BB1_4 Depth=1
-; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
 ; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
@@ -719,7 +720,9 @@ define void @mubuf_vgpr_adjacent_in_block(ptr addrspace(8) %i, ptr addrspace(8)
 ; W64-O0-NEXT:    v_readlane_b32 s10, v1, 13
 ; W64-O0-NEXT:    v_readlane_b32 s11, v1, 14
 ; W64-O0-NEXT:    v_readlane_b32 s6, v1, 0
-; W64-O0-NEXT:    s_nop 4
+; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
+; W64-O0-NEXT:    s_waitcnt vmcnt(0)
+; W64-O0-NEXT:    s_nop 2
 ; W64-O0-NEXT:    buffer_load_format_x v0, v0, s[8:11], s6 idxen
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
 ; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
@@ -729,16 +732,16 @@ define void @mubuf_vgpr_adjacent_in_block(ptr addrspace(8) %i, ptr addrspace(8)
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
 ; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
+; W64-O0-NEXT:    s_waitcnt vmcnt(0)
+; W64-O0-NEXT:    v_readlane_b32 s4, v0, 9
+; W64-O0-NEXT:    v_readlane_b32 s5, v0, 10
+; W64-O0-NEXT:    s_mov_b64 exec, s[4:5]
 ; W64-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
-; W64-O0-NEXT:    s_waitcnt vmcnt(6)
-; W64-O0-NEXT:    v_readlane_b32 s4, v0, 9
-; W64-O0-NEXT:    v_readlane_b32 s5, v0, 10
-; W64-O0-NEXT:    s_mov_b64 exec, s[4:5]
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
 ; W64-O0-NEXT:    global_store_dword v[4:5], v6, off
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
@@ -1175,7 +1178,6 @@ define void @mubuf_vgpr_outside_entry(ptr addrspace(8) %i, ptr addrspace(8) %j,
 ; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
 ; W64-O0-NEXT:  ; %bb.2: ; in Loop: Header=BB2_1 Depth=1
-; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
 ; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
@@ -1187,7 +1189,9 @@ define void @mubuf_vgpr_outside_entry(ptr addrspace(8) %i, ptr addrspace(8) %j,
 ; W64-O0-NEXT:    v_readlane_b32 s10, v1, 6
 ; W64-O0-NEXT:    v_readlane_b32 s11, v1, 7
 ; W64-O0-NEXT:    v_readlane_b32 s6, v1, 1
-; W64-O0-NEXT:    s_nop 4
+; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
+; W64-O0-NEXT:    s_waitcnt vmcnt(0)
+; W64-O0-NEXT:    s_nop 2
 ; W64-O0-NEXT:    buffer_load_format_x v0, v0, s[8:11], s6 idxen
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
 ; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
@@ -1197,13 +1201,13 @@ define void @mubuf_vgpr_outside_entry(ptr addrspace(8) %i, ptr addrspace(8) %j,
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
 ; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
-; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
-; W64-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
-; W64-O0-NEXT:    s_waitcnt vmcnt(2)
+; W64-O0-NEXT:    s_waitcnt vmcnt(0)
 ; W64-O0-NEXT:    v_readlane_b32 s6, v0, 2
 ; W64-O0-NEXT:    v_readlane_b32 s7, v0, 3
 ; W64-O0-NEXT:    s_mov_b64 exec, s[6:7]
 ; W64-O0-NEXT:    v_readlane_b32 s4, v0, 1
+; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_mov_b32 s5, 0x3ff
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
 ; W64-O0-NEXT:    v_and_b32_e64 v2, v2, s5
@@ -1222,12 +1226,12 @@ define void @mubuf_vgpr_outside_entry(ptr addrspace(8) %i, ptr addrspace(8) %j,
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
 ; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
+; W64-O0-NEXT:    s_waitcnt vmcnt(0)
+; W64-O0-NEXT:    v_readlane_b32 s4, v0, 0
 ; W64-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
-; W64-O0-NEXT:    s_waitcnt vmcnt(4)
-; W64-O0-NEXT:    v_readlane_b32 s4, v0, 0
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
 ; W64-O0-NEXT:    v_mov_b32_e32 v7, v5
 ; W64-O0-NEXT:    v_mov_b32_e32 v1, v4
@@ -1291,7 +1295,6 @@ define void @mubuf_vgpr_outside_entry(ptr addrspace(8) %i, ptr addrspace(8) %j,
 ; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
 ; W64-O0-NEXT:  ; %bb.6: ; in Loop: Header=BB2_5 Depth=1
-; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
 ; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
@@ -1303,14 +1306,15 @@ define void @mubuf_vgpr_outside_entry(ptr addrspace(8) %i, ptr addrspace(8) %j,
 ; W64-O0-NEXT:    v_readlane_b32 s10, v1, 17
 ; W64-O0-NEXT:    v_readlane_b32 s11, v1, 18
 ; W64-O0-NEXT:    v_readlane_b32 s6, v1, 12
-; W64-O0-NEXT:    s_nop 4
+; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
+; W64-O0-NEXT:    s_waitcnt vmcnt(0)
+; W64-O0-NEXT:    s_nop 2
 ; W64-O0-NEXT:    buffer_load_format_x v0, v0, s[8:11], s6 idxen
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
 ; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    s_xor_b64 exec, exec, s[4:5]
 ; W64-O0-NEXT:    s_cbranch_execnz .LBB2_5
 ; W64-O0-NEXT:  ; %bb.7:
-; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
 ; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
@@ -1318,18 +1322,20 @@ define void @mubuf_vgpr_outside_entry(ptr addrspace(8) %i, ptr addrspace(8) %j,
 ; W64-O0-NEXT:    v_readlane_b32 s4, v1, 13
 ; W64-O0-NEXT:    v_readlane_b32 s5, v1, 14
 ; W64-O0-NEXT:    s_mov_b64 exec, s[4:5]
+; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload
+; W64-O0-NEXT:    s_waitcnt vmcnt(0)
 ; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
 ; W64-O0-NEXT:  .LBB2_8: ; %bb2
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
 ; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
-; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
-; W64-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
-; W64-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
-; W64-O0-NEXT:    s_waitcnt vmcnt(3)
+; W64-O0-NEXT:    s_waitcnt vmcnt(0)
 ; W64-O0-NEXT:    v_readlane_b32 s4, v0, 10
 ; W64-O0-NEXT:    v_readlane_b32 s5, v0, 11
 ; W64-O0-NEXT:    s_or_b64 exec, exec, s[4:5]
+; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
 ; W64-O0-NEXT:    global_store_dword v[1:2], v3, off
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll b/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll
index 454dc881f7bf2cd..a4948921ed4270d 100644
--- a/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll
+++ b/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll
@@ -223,15 +223,12 @@ define amdgpu_kernel void @spill_sgprs_to_multiple_vgprs(ptr addrspace(1) %out,
 ; GCN-NEXT:    s_cbranch_scc1 .LBB0_2
 ; GCN-NEXT:  ; %bb.1: ; %bb0
 ; GCN-NEXT:    s_or_saveexec_b64 s[34:35], -1
-; GCN-NEXT:    buffer_load_dword v0, off, s[92:95], 0 offset:4 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v2, off, s[92:95], 0 offset:8 ; 4-byte Folded Reload
 ; GCN-NEXT:    s_mov_b64 exec, s[34:35]
 ; GCN-NEXT:    s_or_saveexec_b64 s[34:35], -1
 ; GCN-NEXT:    buffer_load_dword v1, off, s[92:95], 0 offset:12 ; 4-byte Folded Reload
 ; GCN-NEXT:    s_mov_b64 exec, s[34:35]
-; GCN-NEXT:    s_or_saveexec_b64 s[34:35], -1
-; GCN-NEXT:    buffer_load_dword v2, off, s[92:95], 0 offset:8 ; 4-byte Folded Reload
-; GCN-NEXT:    s_mov_b64 exec, s[34:35]
-; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    s_waitcnt vmcnt(1)
 ; GCN-NEXT:    v_readlane_b32 s8, v2, 56
 ; GCN-NEXT:    v_readlane_b32 s9, v2, 57
 ; GCN-NEXT:    v_readlane_b32 s10, v2, 58
@@ -296,6 +293,7 @@ define amdgpu_kernel void @spill_sgprs_to_multiple_vgprs(ptr addrspace(1) %out,
 ; GCN-NEXT:    v_readlane_b32 s73, v2, 5
 ; GCN-NEXT:    v_readlane_b32 s74, v2, 6
 ; GCN-NEXT:    v_readlane_b32 s75, v2, 7
+; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    v_readlane_b32 s76, v1, 56
 ; GCN-NEXT:    v_readlane_b32 s77, v1, 57
 ; GCN-NEXT:    v_readlane_b32 s78, v1, 58
@@ -320,6 +318,9 @@ define amdgpu_kernel void @spill_sgprs_to_multiple_vgprs(ptr addrspace(1) %out,
 ; GCN-NEXT:    v_readlane_b32 s5, v1, 5
 ; GCN-NEXT:    v_readlane_b32 s6, v1, 6
 ; GCN-NEXT:    v_readlane_b32 s7, v1, 7
+; GCN-NEXT:    s_or_saveexec_b64 s[34:35], -1
+; GCN-NEXT:    buffer_load_dword v0, off, s[92:95], 0 offset:4 ; 4-byte Folded Reload
+; GCN-NEXT:    s_mov_b64 exec, s[34:35]
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; use s[0:7]
 ; GCN-NEXT:    ;;#ASMEND
@@ -378,6 +379,7 @@ define amdgpu_kernel void @spill_sgprs_to_multiple_vgprs(ptr addrspace(1) %out,
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; use s[0:7]
 ; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    v_readlane_b32 s0, v0, 0
 ; GCN-NEXT:    v_readlane_b32 s1, v0, 1
 ; GCN-NEXT:    v_readlane_b32 s2, v0, 2
@@ -595,12 +597,12 @@ define amdgpu_kernel void @split_sgpr_spill_2_vgprs(ptr addrspace(1) %out, i32 %
 ; GCN-NEXT:    s_cbranch_scc1 .LBB1_2
 ; GCN-NEXT:  ; %bb.1: ; %bb0
 ; GCN-NEXT:    s_or_saveexec_b64 s[28:29], -1
-; GCN-NEXT:    buffer_load_dword v0, off, s[52:55], 0 offset:8 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v1, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload
 ; GCN-NEXT:    s_mov_b64 exec, s[28:29]
 ; GCN-NEXT:    s_or_saveexec_b64 s[28:29], -1
-; GCN-NEXT:    buffer_load_dword v1, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v0, off, s[52:55], 0 offset:8 ; 4-byte Folded Reload
 ; GCN-NEXT:    s_mov_b64 exec, s[28:29]
-; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    s_waitcnt vmcnt(1)
 ; GCN-NEXT:    v_readlane_b32 s16, v1, 8
 ; GCN-NEXT:    v_readlane_b32 s17, v1, 9
 ; GCN-NEXT:    v_readlane_b32 s20, v1, 0
@@ -611,6 +613,7 @@ define amdgpu_kernel void @split_sgpr_spill_2_vgprs(ptr addrspace(1) %out, i32 %
 ; GCN-NEXT:    v_readlane_b32 s25, v1, 5
 ; GCN-NEXT:    v_readlane_b32 s26, v1, 6
 ; GCN-NEXT:    v_readlane_b32 s27, v1, 7
+; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    v_readlane_b32 s36, v0, 32
 ; GCN-NEXT:    v_readlane_b32 s37, v0, 33
 ; GCN-NEXT:    v_readlane_b32 s38, v0, 34
@@ -854,9 +857,6 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill(ptr addrspace(1) %out, i32 %
 ; GCN-NEXT:    s_cbranch_scc1 .LBB2_2
 ; GCN-NEXT:  ; %bb.1: ; %bb0
 ; GCN-NEXT:    s_or_saveexec_b64 s[34:35], -1
-; GCN-NEXT:    buffer_load_dword v0, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload
-; GCN-NEXT:    s_mov_b64 exec, s[34:35]
-; GCN-NEXT:    s_or_saveexec_b64 s[34:35], -1
 ; GCN-NEXT:    buffer_load_dword v1, off, s[52:55], 0 offset:8 ; 4-byte Folded Reload
 ; GCN-NEXT:    s_mov_b64 exec, s[34:35]
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
@@ -908,6 +908,9 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill(ptr addrspace(1) %out, i32 %
 ; GCN-NEXT:    v_readlane_b32 s29, v1, 13
 ; GCN-NEXT:    v_readlane_b32 s30, v1, 14
 ; GCN-NEXT:    v_readlane_b32 s31, v1, 15
+; GCN-NEXT:    s_or_saveexec_b64 s[34:35], -1
+; GCN-NEXT:    buffer_load_dword v0, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload
+; GCN-NEXT:    s_mov_b64 exec, s[34:35]
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; use s[16:31]
 ; GCN-NEXT:    ;;#ASMEND
@@ -930,6 +933,7 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill(ptr addrspace(1) %out, i32 %
 ; GCN-NEXT:    v_readlane_b32 s17, v1, 61
 ; GCN-NEXT:    v_readlane_b32 s18, v1, 62
 ; GCN-NEXT:    v_readlane_b32 s19, v1, 63
+; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    v_readlane_b32 s0, v0, 0
 ; GCN-NEXT:    v_readlane_b32 s1, v0, 1
 ; GCN-NEXT:    ;;#ASMSTART
@@ -1105,9 +1109,6 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill_live_v0(i32 %in) #1 {
 ; GCN-NEXT:    s_cbranch_scc1 .LBB3_2
 ; GCN-NEXT:  ; %bb.1: ; %bb0
 ; GCN-NEXT:    s_or_saveexec_b64 s[34:35], -1
-; GCN-NEXT:    buffer_load_dword v1, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload
-; GCN-NEXT:    s_mov_b64 exec, s[34:35]
-; GCN-NEXT:    s_or_saveexec_b64 s[34:35], -1
 ; GCN-NEXT:    buffer_load_dword v2, off, s[52:55], 0 offset:8 ; 4-byte Folded Reload
 ; GCN-NEXT:    s_mov_b64 exec, s[34:35]
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
@@ -1159,6 +1160,9 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill_live_v0(i32 %in) #1 {
 ; GCN-NEXT:    v_readlane_b32 s29, v2, 13
 ; GCN-NEXT:    v_readlane_b32 s30, v2, 14
 ; GCN-NEXT:    v_readlane_b32 s31, v2, 15
+; GCN-NEXT:    s_or_saveexec_b64 s[34:35], -1
+; GCN-NEXT:    buffer_load_dword v1, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload
+; GCN-NEXT:    s_mov_b64 exec, s[34:35]
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; def v0
 ; GCN-NEXT:    ;;#ASMEND
@@ -1184,6 +1188,7 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill_live_v0(i32 %in) #1 {
 ; GCN-NEXT:    v_readlane_b32 s17, v2, 61
 ; GCN-NEXT:    v_readlane_b32 s18, v2, 62
 ; GCN-NEXT:    v_readlane_b32 s19, v2, 63
+; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    v_readlane_b32 s0, v1, 0
 ; GCN-NEXT:    v_readlane_b32 s1, v1, 1
 ; GCN-NEXT:    ;;#ASMSTART
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-no-vgprs.ll b/llvm/test/CodeGen/AMDGPU/sgpr-spill-no-vgprs.ll
index fbbcfd62f5c602d..fec732eff798c8c 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-no-vgprs.ll
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-no-vgprs.ll
@@ -123,9 +123,6 @@ define amdgpu_kernel void @partial_no_vgprs_last_sgpr_spill(ptr addrspace(1) %ou
 ; GCN-NEXT:    s_cbranch_scc1 .LBB0_2
 ; GCN-NEXT:  ; %bb.1: ; %bb0
 ; GCN-NEXT:    s_or_saveexec_b64 s[24:25], -1
-; GCN-NEXT:    buffer_load_dword v0, off, s[0:3], 0 offset:4 ; 4-byte Folded Reload
-; GCN-NEXT:    s_mov_b64 exec, s[24:25]
-; GCN-NEXT:    s_or_saveexec_b64 s[24:25], -1
 ; GCN-NEXT:    buffer_load_dword v1, off, s[0:3], 0 offset:8 ; 4-byte Folded Reload
 ; GCN-NEXT:    s_mov_b64 exec, s[24:25]
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
@@ -145,6 +142,9 @@ define amdgpu_kernel void @partial_no_vgprs_last_sgpr_spill(ptr addrspace(1) %ou
 ; GCN-NEXT:    v_readlane_b32 s17, v1, 13
 ; GCN-NEXT:    v_readlane_b32 s18, v1, 14
 ; GCN-NEXT:    v_readlane_b32 s19, v1, 15
+; GCN-NEXT:    s_or_saveexec_b64 s[24:25], -1
+; GCN-NEXT:    buffer_load_dword v0, off, s[0:3], 0 offset:4 ; 4-byte Folded Reload
+; GCN-NEXT:    s_mov_b64 exec, s[24:25]
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; use s[4:19]
 ; GCN-NEXT:    ;;#ASMEND
@@ -202,6 +202,7 @@ define amdgpu_kernel void @partial_no_vgprs_last_sgpr_spill(ptr addrspace(1) %ou
 ; GCN-NEXT:    v_readlane_b32 s21, v1, 61
 ; GCN-NEXT:    v_readlane_b32 s22, v1, 62
 ; GCN-NEXT:    v_readlane_b32 s23, v1, 63
+; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    v_readlane_b32 s4, v0, 0
 ; GCN-NEXT:    v_readlane_b32 s5, v0, 1
 ; GCN-NEXT:    ;;#ASMSTART
diff --git a/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll b/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll
index 08db1e7fee259d6..033fd8ef89cfe06 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll
@@ -10083,17 +10083,30 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
 ; GFX6-NEXT:    s_add_u32 s40, s40, s3
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
 ; GFX6-NEXT:    v_mbcnt_lo_u32_b32_e64 v0, -1, 0
+; GFX6-NEXT:    s_addc_u32 s41, s41, 0
 ; GFX6-NEXT:    v_mbcnt_hi_u32_b32_e32 v5, -1, v0
 ; GFX6-NEXT:    v_mov_b32_e32 v6, 0
+; GFX6-NEXT:    s_mov_b64 s[4:5], exec
 ; GFX6-NEXT:    s_mov_b32 s6, 0
 ; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX6-NEXT:    s_mov_b64 exec, 15
+; GFX6-NEXT:    buffer_store_dword v0, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt expcnt(0) lgkmcnt(0)
+; GFX6-NEXT:    v_writelane_b32 v0, s0, 0
+; GFX6-NEXT:    v_writelane_b32 v0, s1, 1
+; GFX6-NEXT:    v_writelane_b32 v0, s2, 2
+; GFX6-NEXT:    v_writelane_b32 v0, s3, 3
+; GFX6-NEXT:    s_mov_b32 s8, 0x80400
+; GFX6-NEXT:    buffer_store_dword v0, off, s[40:43], s8 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v0, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    s_mov_b64 exec, s[4:5]
 ; GFX6-NEXT:    s_mov_b64 s[4:5], s[2:3]
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v7, 8, v5
 ; GFX6-NEXT:    v_mov_b32_e32 v8, v6
 ; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[7:8], s[4:7], 0 addr64 offset:240
-; GFX6-NEXT:    s_addc_u32 s41, s41, 0
-; GFX6-NEXT:    s_mov_b32 s2, 0x83800
+; GFX6-NEXT:    s_mov_b32 s2, 0x84400
 ; GFX6-NEXT:    s_mov_b64 s[8:9], exec
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill
@@ -10103,7 +10116,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
 ; GFX6-NEXT:    buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
 ; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[7:8], s[4:7], 0 addr64 offset:224
-; GFX6-NEXT:    s_mov_b32 s2, 0x83400
+; GFX6-NEXT:    s_mov_b32 s2, 0x84000
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
@@ -10112,7 +10125,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
 ; GFX6-NEXT:    buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
 ; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[7:8], s[4:7], 0 addr64 offset:208
-; GFX6-NEXT:    s_mov_b32 s2, 0x83000
+; GFX6-NEXT:    s_mov_b32 s2, 0x83c00
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
@@ -10121,7 +10134,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
 ; GFX6-NEXT:    buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
 ; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[7:8], s[4:7], 0 addr64 offset:192
-; GFX6-NEXT:    s_mov_b32 s2, 0x82c00
+; GFX6-NEXT:    s_mov_b32 s2, 0x83800
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
@@ -10130,7 +10143,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
 ; GFX6-NEXT:    buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
 ; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[7:8], s[4:7], 0 addr64 offset:176
-; GFX6-NEXT:    s_mov_b32 s2, 0x82800
+; GFX6-NEXT:    s_mov_b32 s2, 0x83400
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
@@ -10139,7 +10152,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
 ; GFX6-NEXT:    buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
 ; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[7:8], s[4:7], 0 addr64 offset:160
-; GFX6-NEXT:    s_mov_b32 s2, 0x82400
+; GFX6-NEXT:    s_mov_b32 s2, 0x83000
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
@@ -10148,7 +10161,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
 ; GFX6-NEXT:    buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
 ; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[7:8], s[4:7], 0 addr64 offset:144
-; GFX6-NEXT:    s_mov_b32 s2, 0x82000
+; GFX6-NEXT:    s_mov_b32 s2, 0x82c00
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
@@ -10157,7 +10170,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
 ; GFX6-NEXT:    buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
 ; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[7:8], s[4:7], 0 addr64 offset:128
-; GFX6-NEXT:    s_mov_b32 s2, 0x81c00
+; GFX6-NEXT:    s_mov_b32 s2, 0x82800
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
@@ -10166,7 +10179,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
 ; GFX6-NEXT:    buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
 ; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[7:8], s[4:7], 0 addr64 offset:112
-; GFX6-NEXT:    s_mov_b32 s2, 0x81800
+; GFX6-NEXT:    s_mov_b32 s2, 0x82400
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
@@ -10175,7 +10188,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
 ; GFX6-NEXT:    buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
 ; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[7:8], s[4:7], 0 addr64 offset:96
-; GFX6-NEXT:    s_mov_b32 s2, 0x81400
+; GFX6-NEXT:    s_mov_b32 s2, 0x82000
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
@@ -10184,25 +10197,41 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
 ; GFX6-NEXT:    buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
 ; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[7:8], s[4:7], 0 addr64 offset:80
-; GFX6-NEXT:    s_mov_b32 s2, 0x81000
+; GFX6-NEXT:    s_mov_b32 s2, 0x81c00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[7:8], s[4:7], 0 addr64 offset:64
+; GFX6-NEXT:    s_mov_b32 s2, 0x81400
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill
 ; GFX6-NEXT:    buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill
 ; GFX6-NEXT:    buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
-; GFX6-NEXT:    buffer_load_dwordx4 v[17:20], v[7:8], s[4:7], 0 addr64 offset:64
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
 ; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v[7:8], s[4:7], 0 addr64
 ; GFX6-NEXT:    buffer_load_dwordx4 v[9:12], v[7:8], s[4:7], 0 addr64 offset:16
-; GFX6-NEXT:    s_mov_b32 s2, 0x80800
+; GFX6-NEXT:    s_mov_b32 s2, 0x80c00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v9, off, s[40:43], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dword v10, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v11, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v12, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dwordx4 v[9:12], v[7:8], s[4:7], 0 addr64 offset:32
+; GFX6-NEXT:    s_mov_b32 s2, 0x81000
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    buffer_store_dword v9, off, s[40:43], s2 ; 4-byte Folded Spill
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    buffer_store_dword v10, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill
 ; GFX6-NEXT:    buffer_store_dword v11, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill
 ; GFX6-NEXT:    buffer_store_dword v12, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
-; GFX6-NEXT:    buffer_load_dwordx4 v[13:16], v[7:8], s[4:7], 0 addr64 offset:32
 ; GFX6-NEXT:    s_mov_b64 s[2:3], s[6:7]
 ; GFX6-NEXT:    s_mov_b64 exec, 15
 ; GFX6-NEXT:    buffer_store_dword v4, off, s[40:43], 0
@@ -10211,25 +10240,25 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
 ; GFX6-NEXT:    v_writelane_b32 v4, s1, 1
 ; GFX6-NEXT:    v_writelane_b32 v4, s2, 2
 ; GFX6-NEXT:    v_writelane_b32 v4, s3, 3
-; GFX6-NEXT:    s_mov_b32 s10, 0x80400
+; GFX6-NEXT:    s_mov_b32 s10, 0x80800
 ; GFX6-NEXT:    buffer_store_dword v4, off, s[40:43], s10 ; 4-byte Folded Spill
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
 ; GFX6-NEXT:    buffer_load_dword v4, off, s[40:43], 0
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    s_mov_b64 exec, s[8:9]
 ; GFX6-NEXT:    buffer_load_dwordx4 v[7:10], v[7:8], s[4:7], 0 addr64 offset:48
-; GFX6-NEXT:    s_mov_b32 s2, 0x80c00
+; GFX6-NEXT:    s_mov_b32 s0, 0x81800
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v4, 13, v0
 ; GFX6-NEXT:    v_add_i32_e32 v4, vcc, 16, v4
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v7, off, s[40:43], s2 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v7, off, s[40:43], s0 ; 4-byte Folded Spill
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v8, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill
-; GFX6-NEXT:    buffer_store_dword v9, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill
-; GFX6-NEXT:    buffer_store_dword v10, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v8, off, s[40:43], s0 offset:4 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v9, off, s[40:43], s0 offset:8 ; 4-byte Folded Spill
+; GFX6-NEXT:    buffer_store_dword v10, off, s[40:43], s0 offset:12 ; 4-byte Folded Spill
 ; GFX6-NEXT:    s_waitcnt expcnt(3)
 ; GFX6-NEXT:    v_mov_b32_e32 v7, 1
-; GFX6-NEXT:    s_mov_b64 s[2:3], exec
+; GFX6-NEXT:    s_mov_b64 s[0:1], exec
 ; GFX6-NEXT:    buffer_store_dword v7, v4, s[40:43], 0 offen
 ; GFX6-NEXT:    ;;#ASMSTART
 ; GFX6-NEXT:    ; def s[4:11]
@@ -10245,12 +10274,12 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
 ; GFX6-NEXT:    v_writelane_b32 v4, s9, 5
 ; GFX6-NEXT:    v_writelane_b32 v4, s10, 6
 ; GFX6-NEXT:    v_writelane_b32 v4, s11, 7
-; GFX6-NEXT:    s_mov_b32 s12, 0x83c00
-; GFX6-NEXT:    buffer_store_dword v4, off, s[40:43], s12 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_mov_b32 s2, 0x84800
+; GFX6-NEXT:    buffer_store_dword v4, off, s[40:43], s2 ; 4-byte Folded Spill
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
 ; GFX6-NEXT:    buffer_load_dword v4, off, s[40:43], 0
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    s_mov_b64 exec, s[2:3]
+; GFX6-NEXT:    s_mov_b64 exec, s[0:1]
 ; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
 ; GFX6-NEXT:    ;;#ASMSTART
 ; GFX6-NEXT:    ; def s[8:15]
@@ -10262,18 +10291,19 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
 ; GFX6-NEXT:    ; def s[24:31]
 ; GFX6-NEXT:    ;;#ASMEND
 ; GFX6-NEXT:    ;;#ASMSTART
-; GFX6-NEXT:    ; def s[4:7]
+; GFX6-NEXT:    ; def s[0:3]
 ; GFX6-NEXT:    ;;#ASMEND
 ; GFX6-NEXT:    ;;#ASMSTART
-; GFX6-NEXT:    ; def s[2:3]
+; GFX6-NEXT:    ; def s[4:5]
 ; GFX6-NEXT:    ;;#ASMEND
 ; GFX6-NEXT:    ;;#ASMSTART
 ; GFX6-NEXT:    ; def s33
 ; GFX6-NEXT:    ;;#ASMEND
-; GFX6-NEXT:    s_and_saveexec_b64 s[34:35], vcc
+; GFX6-NEXT:    s_and_saveexec_b64 s[6:7], vcc
+; GFX6-NEXT:    s_mov_b64 vcc, s[6:7]
 ; GFX6-NEXT:    s_cbranch_execz .LBB1_2
 ; GFX6-NEXT:  ; %bb.1: ; %bb0
-; GFX6-NEXT:    s_mov_b64 s[36:37], exec
+; GFX6-NEXT:    s_mov_b64 s[6:7], exec
 ; GFX6-NEXT:    s_mov_b64 exec, 0xff
 ; GFX6-NEXT:    buffer_store_dword v4, off, s[40:43], 0
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
@@ -10285,18 +10315,18 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
 ; GFX6-NEXT:    v_writelane_b32 v4, s13, 5
 ; GFX6-NEXT:    v_writelane_b32 v4, s14, 6
 ; GFX6-NEXT:    v_writelane_b32 v4, s15, 7
-; GFX6-NEXT:    s_mov_b32 s38, 0x84400
-; GFX6-NEXT:    buffer_store_dword v4, off, s[40:43], s38 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_mov_b32 s34, 0x85000
+; GFX6-NEXT:    buffer_store_dword v4, off, s[40:43], s34 ; 4-byte Folded Spill
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
 ; GFX6-NEXT:    buffer_load_dword v4, off, s[40:43], 0
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    s_mov_b64 exec, s[36:37]
-; GFX6-NEXT:    s_mov_b64 s[36:37], exec
+; GFX6-NEXT:    s_mov_b64 exec, s[6:7]
+; GFX6-NEXT:    s_mov_b64 s[6:7], exec
 ; GFX6-NEXT:    s_mov_b64 exec, 0xff
-; GFX6-NEXT:    s_mov_b32 s38, 0x83c00
+; GFX6-NEXT:    s_mov_b32 s34, 0x84800
 ; GFX6-NEXT:    buffer_store_dword v4, off, s[40:43], 0
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    buffer_load_dword v4, off, s[40:43], s38 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v4, off, s[40:43], s34 ; 4-byte Folded Reload
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    v_readlane_b32 s8, v4, 0
 ; GFX6-NEXT:    v_readlane_b32 s9, v4, 1
@@ -10308,8 +10338,8 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
 ; GFX6-NEXT:    v_readlane_b32 s15, v4, 7
 ; GFX6-NEXT:    buffer_load_dword v4, off, s[40:43], 0
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    s_mov_b64 exec, s[36:37]
-; GFX6-NEXT:    s_mov_b64 s[36:37], exec
+; GFX6-NEXT:    s_mov_b64 exec, s[6:7]
+; GFX6-NEXT:    s_mov_b64 s[6:7], exec
 ; GFX6-NEXT:    s_mov_b64 exec, 0xff
 ; GFX6-NEXT:    buffer_store_dword v4, off, s[40:43], 0
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
@@ -10321,18 +10351,18 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
 ; GFX6-NEXT:    v_writelane_b32 v4, s21, 5
 ; GFX6-NEXT:    v_writelane_b32 v4, s22, 6
 ; GFX6-NEXT:    v_writelane_b32 v4, s23, 7
-; GFX6-NEXT:    s_mov_b32 s38, 0x84c00
-; GFX6-NEXT:    buffer_store_dword v4, off, s[40:43], s38 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_mov_b32 s34, 0x85800
+; GFX6-NEXT:    buffer_store_dword v4, off, s[40:43], s34 ; 4-byte Folded Spill
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
 ; GFX6-NEXT:    buffer_load_dword v4, off, s[40:43], 0
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    s_mov_b64 exec, s[36:37]
-; GFX6-NEXT:    s_mov_b64 s[36:37], exec
+; GFX6-NEXT:    s_mov_b64 exec, s[6:7]
+; GFX6-NEXT:    s_mov_b64 s[6:7], exec
 ; GFX6-NEXT:    s_mov_b64 exec, 0xff
-; GFX6-NEXT:    s_mov_b32 s38, 0x84400
+; GFX6-NEXT:    s_mov_b32 s34, 0x85000
 ; GFX6-NEXT:    buffer_store_dword v4, off, s[40:43], 0
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    buffer_load_dword v4, off, s[40:43], s38 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v4, off, s[40:43], s34 ; 4-byte Folded Reload
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    v_readlane_b32 s16, v4, 0
 ; GFX6-NEXT:    v_readlane_b32 s17, v4, 1
@@ -10344,8 +10374,8 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
 ; GFX6-NEXT:    v_readlane_b32 s23, v4, 7
 ; GFX6-NEXT:    buffer_load_dword v4, off, s[40:43], 0
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    s_mov_b64 exec, s[36:37]
-; GFX6-NEXT:    s_mov_b64 s[36:37], exec
+; GFX6-NEXT:    s_mov_b64 exec, s[6:7]
+; GFX6-NEXT:    s_mov_b64 s[6:7], exec
 ; GFX6-NEXT:    s_mov_b64 exec, 0xff
 ; GFX6-NEXT:    buffer_store_dword v4, off, s[40:43], 0
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
@@ -10357,18 +10387,18 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
 ; GFX6-NEXT:    v_writelane_b32 v4, s29, 5
 ; GFX6-NEXT:    v_writelane_b32 v4, s30, 6
 ; GFX6-NEXT:    v_writelane_b32 v4, s31, 7
-; GFX6-NEXT:    s_mov_b32 s38, 0x85400
-; GFX6-NEXT:    buffer_store_dword v4, off, s[40:43], s38 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_mov_b32 s34, 0x86000
+; GFX6-NEXT:    buffer_store_dword v4, off, s[40:43], s34 ; 4-byte Folded Spill
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
 ; GFX6-NEXT:    buffer_load_dword v4, off, s[40:43], 0
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    s_mov_b64 exec, s[36:37]
-; GFX6-NEXT:    s_mov_b64 s[36:37], exec
+; GFX6-NEXT:    s_mov_b64 exec, s[6:7]
+; GFX6-NEXT:    s_mov_b64 s[6:7], exec
 ; GFX6-NEXT:    s_mov_b64 exec, 0xff
-; GFX6-NEXT:    s_mov_b32 s38, 0x84c00
+; GFX6-NEXT:    s_mov_b32 s34, 0x85800
 ; GFX6-NEXT:    buffer_store_dword v4, off, s[40:43], 0
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    buffer_load_dword v4, off, s[40:43], s38 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v4, off, s[40:43], s34 ; 4-byte Folded Reload
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    v_readlane_b32 s24, v4, 0
 ; GFX6-NEXT:    v_readlane_b32 s25, v4, 1
@@ -10380,8 +10410,8 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
 ; GFX6-NEXT:    v_readlane_b32 s31, v4, 7
 ; GFX6-NEXT:    buffer_load_dword v4, off, s[40:43], 0
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    s_mov_b64 exec, s[36:37]
-; GFX6-NEXT:    s_mov_b64 s[36:37], exec
+; GFX6-NEXT:    s_mov_b64 exec, s[6:7]
+; GFX6-NEXT:    s_mov_b64 s[6:7], exec
 ; GFX6-NEXT:    s_mov_b64 exec, 15
 ; GFX6-NEXT:    buffer_store_dword v4, off, s[40:43], 0
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
@@ -10389,44 +10419,30 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
 ; GFX6-NEXT:    v_writelane_b32 v4, s1, 1
 ; GFX6-NEXT:    v_writelane_b32 v4, s2, 2
 ; GFX6-NEXT:    v_writelane_b32 v4, s3, 3
-; GFX6-NEXT:    s_mov_b32 s38, 0x85c00
-; GFX6-NEXT:    buffer_store_dword v4, off, s[40:43], s38 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_mov_b32 s34, 0x86800
+; GFX6-NEXT:    buffer_store_dword v4, off, s[40:43], s34 ; 4-byte Folded Spill
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
 ; GFX6-NEXT:    buffer_load_dword v4, off, s[40:43], 0
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    s_mov_b64 exec, s[36:37]
+; GFX6-NEXT:    s_mov_b64 exec, s[6:7]
 ; GFX6-NEXT:    s_mov_b64 s[0:1], exec
-; GFX6-NEXT:    s_mov_b64 exec, 15
+; GFX6-NEXT:    s_mov_b64 exec, 3
 ; GFX6-NEXT:    buffer_store_dword v4, off, s[40:43], 0
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
 ; GFX6-NEXT:    v_writelane_b32 v4, s4, 0
 ; GFX6-NEXT:    v_writelane_b32 v4, s5, 1
-; GFX6-NEXT:    v_writelane_b32 v4, s6, 2
-; GFX6-NEXT:    v_writelane_b32 v4, s7, 3
-; GFX6-NEXT:    s_mov_b32 s36, 0x86000
-; GFX6-NEXT:    buffer_store_dword v4, off, s[40:43], s36 ; 4-byte Folded Spill
-; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    buffer_load_dword v4, off, s[40:43], 0
-; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    s_mov_b64 exec, s[0:1]
-; GFX6-NEXT:    s_mov_b64 s[0:1], exec
-; GFX6-NEXT:    s_mov_b64 exec, 3
-; GFX6-NEXT:    buffer_store_dword v4, off, s[40:43], 0
-; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_writelane_b32 v4, s2, 0
-; GFX6-NEXT:    v_writelane_b32 v4, s3, 1
-; GFX6-NEXT:    s_mov_b32 s4, 0x86400
-; GFX6-NEXT:    buffer_store_dword v4, off, s[40:43], s4 ; 4-byte Folded Spill
+; GFX6-NEXT:    s_mov_b32 s2, 0x86c00
+; GFX6-NEXT:    buffer_store_dword v4, off, s[40:43], s2 ; 4-byte Folded Spill
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
 ; GFX6-NEXT:    buffer_load_dword v4, off, s[40:43], 0
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    s_mov_b64 exec, s[0:1]
-; GFX6-NEXT:    s_mov_b64 s[36:37], exec
+; GFX6-NEXT:    s_mov_b64 s[34:35], exec
 ; GFX6-NEXT:    s_mov_b64 exec, 0xff
-; GFX6-NEXT:    s_mov_b32 s38, 0x85400
+; GFX6-NEXT:    s_mov_b32 s36, 0x86000
 ; GFX6-NEXT:    buffer_store_dword v4, off, s[40:43], 0
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    buffer_load_dword v4, off, s[40:43], s38 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v4, off, s[40:43], s36 ; 4-byte Folded Reload
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    v_readlane_b32 s0, v4, 0
 ; GFX6-NEXT:    v_readlane_b32 s1, v4, 1
@@ -10438,13 +10454,13 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
 ; GFX6-NEXT:    v_readlane_b32 s7, v4, 7
 ; GFX6-NEXT:    buffer_load_dword v4, off, s[40:43], 0
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    s_mov_b64 exec, s[36:37]
-; GFX6-NEXT:    s_mov_b64 s[44:45], exec
+; GFX6-NEXT:    s_mov_b64 exec, s[34:35]
+; GFX6-NEXT:    s_mov_b64 s[34:35], exec
 ; GFX6-NEXT:    s_mov_b64 exec, 15
-; GFX6-NEXT:    v_mov_b32_e32 v7, 0x2180
+; GFX6-NEXT:    s_mov_b32 s44, 0x86800
 ; GFX6-NEXT:    buffer_store_dword v4, off, s[40:43], 0
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    buffer_load_dword v4, v7, s[40:43], 0 offen ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v4, off, s[40:43], s44 ; 4-byte Folded Reload
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    v_readlane_b32 s36, v4, 0
 ; GFX6-NEXT:    v_readlane_b32 s37, v4, 1
@@ -10452,11 +10468,10 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
 ; GFX6-NEXT:    v_readlane_b32 s39, v4, 3
 ; GFX6-NEXT:    buffer_load_dword v4, off, s[40:43], 0
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    s_mov_b64 exec, s[44:45]
-; GFX6-NEXT:    s_mov_b64 vcc, s[34:35]
+; GFX6-NEXT:    s_mov_b64 exec, s[34:35]
 ; GFX6-NEXT:    s_mov_b64 s[44:45], exec
 ; GFX6-NEXT:    s_mov_b64 exec, 3
-; GFX6-NEXT:    v_mov_b32_e32 v7, 0x2190
+; GFX6-NEXT:    v_mov_b32_e32 v7, 0x21b0
 ; GFX6-NEXT:    buffer_store_dword v4, off, s[40:43], 0
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
 ; GFX6-NEXT:    buffer_load_dword v4, v7, s[40:43], 0 offen ; 4-byte Folded Reload
@@ -10469,54 +10484,8 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
 ; GFX6-NEXT:    ;;#ASMSTART
 ; GFX6-NEXT:    ; use s[8:15],s[16:23],s[24:31],s[0:7],s[36:39],s[34:35]
 ; GFX6-NEXT:    ;;#ASMEND
-; GFX6-NEXT:    s_mov_b64 s[34:35], vcc
-; GFX6-NEXT:    s_mov_b64 s[4:5], exec
-; GFX6-NEXT:    s_mov_b64 exec, 15
-; GFX6-NEXT:    s_mov_b32 s6, 0x85c00
-; GFX6-NEXT:    buffer_store_dword v4, off, s[40:43], 0
-; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    buffer_load_dword v4, off, s[40:43], s6 ; 4-byte Folded Reload
-; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    v_readlane_b32 s0, v4, 0
-; GFX6-NEXT:    v_readlane_b32 s1, v4, 1
-; GFX6-NEXT:    v_readlane_b32 s2, v4, 2
-; GFX6-NEXT:    v_readlane_b32 s3, v4, 3
-; GFX6-NEXT:    buffer_load_dword v4, off, s[40:43], 0
-; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    s_mov_b64 exec, s[4:5]
-; GFX6-NEXT:    s_mov_b32 s2, 0x83c00
-; GFX6-NEXT:    buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill
-; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill
-; GFX6-NEXT:    buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill
-; GFX6-NEXT:    buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
-; GFX6-NEXT:    s_mov_b32 s2, 0x84400
-; GFX6-NEXT:    buffer_store_dword v13, off, s[40:43], s2 ; 4-byte Folded Spill
-; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v14, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill
-; GFX6-NEXT:    buffer_store_dword v15, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill
-; GFX6-NEXT:    buffer_store_dword v16, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
-; GFX6-NEXT:    s_waitcnt expcnt(4)
-; GFX6-NEXT:    v_mov_b32_e32 v0, v17
-; GFX6-NEXT:    v_mov_b32_e32 v1, v18
-; GFX6-NEXT:    v_mov_b32_e32 v2, v19
-; GFX6-NEXT:    v_mov_b32_e32 v3, v20
-; GFX6-NEXT:    s_waitcnt expcnt(0)
 ; GFX6-NEXT:    ;;#ASMSTART
 ; GFX6-NEXT:    ;;#ASMEND
-; GFX6-NEXT:    v_mov_b32_e32 v20, v3
-; GFX6-NEXT:    buffer_load_dword v13, off, s[40:43], s2 ; 4-byte Folded Reload
-; GFX6-NEXT:    buffer_load_dword v14, off, s[40:43], s2 offset:4 ; 4-byte Folded Reload
-; GFX6-NEXT:    buffer_load_dword v15, off, s[40:43], s2 offset:8 ; 4-byte Folded Reload
-; GFX6-NEXT:    buffer_load_dword v16, off, s[40:43], s2 offset:12 ; 4-byte Folded Reload
-; GFX6-NEXT:    s_mov_b32 s2, 0x83c00
-; GFX6-NEXT:    v_mov_b32_e32 v19, v2
-; GFX6-NEXT:    v_mov_b32_e32 v18, v1
-; GFX6-NEXT:    v_mov_b32_e32 v17, v0
-; GFX6-NEXT:    buffer_load_dword v0, off, s[40:43], s2 ; 4-byte Folded Reload
-; GFX6-NEXT:    buffer_load_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Reload
-; GFX6-NEXT:    buffer_load_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Reload
-; GFX6-NEXT:    buffer_load_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Reload
 ; GFX6-NEXT:    ;;#ASMSTART
 ; GFX6-NEXT:    ;;#ASMEND
 ; GFX6-NEXT:    ;;#ASMSTART
@@ -10530,128 +10499,158 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
 ; GFX6-NEXT:    ;;#ASMSTART
 ; GFX6-NEXT:    ;;#ASMEND
 ; GFX6-NEXT:  .LBB1_2: ; %ret
-; GFX6-NEXT:    s_or_b64 exec, exec, s[34:35]
-; GFX6-NEXT:    s_mov_b64 s[2:3], exec
+; GFX6-NEXT:    s_or_b64 exec, exec, vcc
+; GFX6-NEXT:    s_mov_b64 s[4:5], exec
 ; GFX6-NEXT:    s_mov_b64 exec, 15
-; GFX6-NEXT:    s_mov_b32 s8, 0x80400
+; GFX6-NEXT:    s_mov_b32 s6, 0x80400
 ; GFX6-NEXT:    buffer_store_dword v4, off, s[40:43], 0
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    buffer_load_dword v4, off, s[40:43], s8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v4, off, s[40:43], s6 ; 4-byte Folded Reload
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    v_readlane_b32 s4, v4, 0
-; GFX6-NEXT:    v_readlane_b32 s5, v4, 1
-; GFX6-NEXT:    v_readlane_b32 s6, v4, 2
-; GFX6-NEXT:    v_readlane_b32 s7, v4, 3
+; GFX6-NEXT:    v_readlane_b32 s0, v4, 0
+; GFX6-NEXT:    v_readlane_b32 s1, v4, 1
+; GFX6-NEXT:    v_readlane_b32 s2, v4, 2
+; GFX6-NEXT:    v_readlane_b32 s3, v4, 3
 ; GFX6-NEXT:    buffer_load_dword v4, off, s[40:43], 0
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    s_mov_b64 exec, s[2:3]
-; GFX6-NEXT:    s_mov_b32 s4, 0x83800
+; GFX6-NEXT:    s_mov_b64 exec, s[4:5]
+; GFX6-NEXT:    s_mov_b64 s[36:37], s[0:1]
+; GFX6-NEXT:    s_mov_b64 s[4:5], exec
+; GFX6-NEXT:    s_mov_b64 exec, 15
+; GFX6-NEXT:    s_mov_b32 s6, 0x80800
+; GFX6-NEXT:    buffer_store_dword v4, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v4, off, s[40:43], s6 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    v_readlane_b32 s0, v4, 0
+; GFX6-NEXT:    v_readlane_b32 s1, v4, 1
+; GFX6-NEXT:    v_readlane_b32 s2, v4, 2
+; GFX6-NEXT:    v_readlane_b32 s3, v4, 3
+; GFX6-NEXT:    buffer_load_dword v4, off, s[40:43], 0
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    s_mov_b64 exec, s[4:5]
+; GFX6-NEXT:    s_mov_b32 s0, 0x84400
 ; GFX6-NEXT:    v_lshl_b64 v[4:5], v[5:6], 8
-; GFX6-NEXT:    buffer_load_dword v6, off, s[40:43], s4 ; 4-byte Folded Reload
-; GFX6-NEXT:    buffer_load_dword v7, off, s[40:43], s4 offset:4 ; 4-byte Folded Reload
-; GFX6-NEXT:    buffer_load_dword v8, off, s[40:43], s4 offset:8 ; 4-byte Folded Reload
-; GFX6-NEXT:    buffer_load_dword v9, off, s[40:43], s4 offset:12 ; 4-byte Folded Reload
-; GFX6-NEXT:    s_mov_b64 s[2:3], s[6:7]
-; GFX6-NEXT:    s_mov_b32 s4, 0x83400
-; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], v[4:5], s[0:3], 0 addr64 offset:240
-; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    buffer_load_dword v6, off, s[40:43], s4 ; 4-byte Folded Reload
-; GFX6-NEXT:    buffer_load_dword v7, off, s[40:43], s4 offset:4 ; 4-byte Folded Reload
-; GFX6-NEXT:    buffer_load_dword v8, off, s[40:43], s4 offset:8 ; 4-byte Folded Reload
-; GFX6-NEXT:    buffer_load_dword v9, off, s[40:43], s4 offset:12 ; 4-byte Folded Reload
-; GFX6-NEXT:    s_mov_b32 s4, 0x83000
-; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], v[4:5], s[0:3], 0 addr64 offset:224
-; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    buffer_load_dword v6, off, s[40:43], s4 ; 4-byte Folded Reload
-; GFX6-NEXT:    buffer_load_dword v7, off, s[40:43], s4 offset:4 ; 4-byte Folded Reload
-; GFX6-NEXT:    buffer_load_dword v8, off, s[40:43], s4 offset:8 ; 4-byte Folded Reload
-; GFX6-NEXT:    buffer_load_dword v9, off, s[40:43], s4 offset:12 ; 4-byte Folded Reload
-; GFX6-NEXT:    s_mov_b32 s4, 0x82c00
-; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], v[4:5], s[0:3], 0 addr64 offset:208
-; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    buffer_load_dword v6, off, s[40:43], s4 ; 4-byte Folded Reload
-; GFX6-NEXT:    buffer_load_dword v7, off, s[40:43], s4 offset:4 ; 4-byte Folded Reload
-; GFX6-NEXT:    buffer_load_dword v8, off, s[40:43], s4 offset:8 ; 4-byte Folded Reload
-; GFX6-NEXT:    buffer_load_dword v9, off, s[40:43], s4 offset:12 ; 4-byte Folded Reload
-; GFX6-NEXT:    s_mov_b32 s4, 0x82800
-; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], v[4:5], s[0:3], 0 addr64 offset:192
-; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    buffer_load_dword v6, off, s[40:43], s4 ; 4-byte Folded Reload
-; GFX6-NEXT:    buffer_load_dword v7, off, s[40:43], s4 offset:4 ; 4-byte Folded Reload
-; GFX6-NEXT:    buffer_load_dword v8, off, s[40:43], s4 offset:8 ; 4-byte Folded Reload
-; GFX6-NEXT:    buffer_load_dword v9, off, s[40:43], s4 offset:12 ; 4-byte Folded Reload
-; GFX6-NEXT:    s_mov_b32 s4, 0x82400
-; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], v[4:5], s[0:3], 0 addr64 offset:176
-; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    buffer_load_dword v6, off, s[40:43], s4 ; 4-byte Folded Reload
-; GFX6-NEXT:    buffer_load_dword v7, off, s[40:43], s4 offset:4 ; 4-byte Folded Reload
-; GFX6-NEXT:    buffer_load_dword v8, off, s[40:43], s4 offset:8 ; 4-byte Folded Reload
-; GFX6-NEXT:    buffer_load_dword v9, off, s[40:43], s4 offset:12 ; 4-byte Folded Reload
-; GFX6-NEXT:    s_mov_b32 s4, 0x82000
-; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], v[4:5], s[0:3], 0 addr64 offset:160
-; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    buffer_load_dword v6, off, s[40:43], s4 ; 4-byte Folded Reload
-; GFX6-NEXT:    buffer_load_dword v7, off, s[40:43], s4 offset:4 ; 4-byte Folded Reload
-; GFX6-NEXT:    buffer_load_dword v8, off, s[40:43], s4 offset:8 ; 4-byte Folded Reload
-; GFX6-NEXT:    buffer_load_dword v9, off, s[40:43], s4 offset:12 ; 4-byte Folded Reload
-; GFX6-NEXT:    s_mov_b32 s4, 0x81c00
-; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], v[4:5], s[0:3], 0 addr64 offset:144
-; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    buffer_load_dword v6, off, s[40:43], s4 ; 4-byte Folded Reload
-; GFX6-NEXT:    buffer_load_dword v7, off, s[40:43], s4 offset:4 ; 4-byte Folded Reload
-; GFX6-NEXT:    buffer_load_dword v8, off, s[40:43], s4 offset:8 ; 4-byte Folded Reload
-; GFX6-NEXT:    buffer_load_dword v9, off, s[40:43], s4 offset:12 ; 4-byte Folded Reload
-; GFX6-NEXT:    s_mov_b32 s4, 0x81800
-; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], v[4:5], s[0:3], 0 addr64 offset:128
-; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    buffer_load_dword v6, off, s[40:43], s4 ; 4-byte Folded Reload
-; GFX6-NEXT:    buffer_load_dword v7, off, s[40:43], s4 offset:4 ; 4-byte Folded Reload
-; GFX6-NEXT:    buffer_load_dword v8, off, s[40:43], s4 offset:8 ; 4-byte Folded Reload
-; GFX6-NEXT:    buffer_load_dword v9, off, s[40:43], s4 offset:12 ; 4-byte Folded Reload
-; GFX6-NEXT:    s_mov_b32 s4, 0x81400
-; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], v[4:5], s[0:3], 0 addr64 offset:112
-; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    buffer_load_dword v6, off, s[40:43], s4 ; 4-byte Folded Reload
-; GFX6-NEXT:    buffer_load_dword v7, off, s[40:43], s4 offset:4 ; 4-byte Folded Reload
-; GFX6-NEXT:    buffer_load_dword v8, off, s[40:43], s4 offset:8 ; 4-byte Folded Reload
-; GFX6-NEXT:    buffer_load_dword v9, off, s[40:43], s4 offset:12 ; 4-byte Folded Reload
-; GFX6-NEXT:    s_mov_b32 s4, 0x81000
-; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], v[4:5], s[0:3], 0 addr64 offset:96
-; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    buffer_load_dword v6, off, s[40:43], s4 ; 4-byte Folded Reload
-; GFX6-NEXT:    buffer_load_dword v7, off, s[40:43], s4 offset:4 ; 4-byte Folded Reload
-; GFX6-NEXT:    buffer_load_dword v8, off, s[40:43], s4 offset:8 ; 4-byte Folded Reload
-; GFX6-NEXT:    buffer_load_dword v9, off, s[40:43], s4 offset:12 ; 4-byte Folded Reload
-; GFX6-NEXT:    s_mov_b32 s4, 0x80c00
-; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], v[4:5], s[0:3], 0 addr64 offset:80
-; GFX6-NEXT:    buffer_store_dwordx4 v[17:20], v[4:5], s[0:3], 0 addr64 offset:64
-; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    buffer_load_dword v17, off, s[40:43], s4 ; 4-byte Folded Reload
-; GFX6-NEXT:    buffer_load_dword v18, off, s[40:43], s4 offset:4 ; 4-byte Folded Reload
-; GFX6-NEXT:    buffer_load_dword v19, off, s[40:43], s4 offset:8 ; 4-byte Folded Reload
-; GFX6-NEXT:    buffer_load_dword v20, off, s[40:43], s4 offset:12 ; 4-byte Folded Reload
-; GFX6-NEXT:    s_mov_b32 s4, 0x80800
-; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dwordx4 v[17:20], v[4:5], s[0:3], 0 addr64 offset:48
-; GFX6-NEXT:    buffer_store_dwordx4 v[13:16], v[4:5], s[0:3], 0 addr64 offset:32
-; GFX6-NEXT:    buffer_load_dword v6, off, s[40:43], s4 ; 4-byte Folded Reload
-; GFX6-NEXT:    buffer_load_dword v7, off, s[40:43], s4 offset:4 ; 4-byte Folded Reload
-; GFX6-NEXT:    buffer_load_dword v8, off, s[40:43], s4 offset:8 ; 4-byte Folded Reload
-; GFX6-NEXT:    buffer_load_dword v9, off, s[40:43], s4 offset:12 ; 4-byte Folded Reload
-; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], v[4:5], s[0:3], 0 addr64 offset:16
-; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[4:5], s[0:3], 0 addr64
+; GFX6-NEXT:    buffer_load_dword v6, off, s[40:43], s0 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v7, off, s[40:43], s0 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v8, off, s[40:43], s0 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v9, off, s[40:43], s0 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b64 s[38:39], s[2:3]
+; GFX6-NEXT:    s_mov_b32 s0, 0x84000
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], v[4:5], s[36:39], 0 addr64 offset:240
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v6, off, s[40:43], s0 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v7, off, s[40:43], s0 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v8, off, s[40:43], s0 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v9, off, s[40:43], s0 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s0, 0x83c00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], v[4:5], s[36:39], 0 addr64 offset:224
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v6, off, s[40:43], s0 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v7, off, s[40:43], s0 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v8, off, s[40:43], s0 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v9, off, s[40:43], s0 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s0, 0x83800
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], v[4:5], s[36:39], 0 addr64 offset:208
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v6, off, s[40:43], s0 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v7, off, s[40:43], s0 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v8, off, s[40:43], s0 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v9, off, s[40:43], s0 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s0, 0x83400
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], v[4:5], s[36:39], 0 addr64 offset:192
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v6, off, s[40:43], s0 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v7, off, s[40:43], s0 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v8, off, s[40:43], s0 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v9, off, s[40:43], s0 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s0, 0x83000
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], v[4:5], s[36:39], 0 addr64 offset:176
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v6, off, s[40:43], s0 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v7, off, s[40:43], s0 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v8, off, s[40:43], s0 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v9, off, s[40:43], s0 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s0, 0x82c00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], v[4:5], s[36:39], 0 addr64 offset:160
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v6, off, s[40:43], s0 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v7, off, s[40:43], s0 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v8, off, s[40:43], s0 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v9, off, s[40:43], s0 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s0, 0x82800
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], v[4:5], s[36:39], 0 addr64 offset:144
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v6, off, s[40:43], s0 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v7, off, s[40:43], s0 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v8, off, s[40:43], s0 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v9, off, s[40:43], s0 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s0, 0x82400
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], v[4:5], s[36:39], 0 addr64 offset:128
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v6, off, s[40:43], s0 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v7, off, s[40:43], s0 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v8, off, s[40:43], s0 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v9, off, s[40:43], s0 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s0, 0x82000
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], v[4:5], s[36:39], 0 addr64 offset:112
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v6, off, s[40:43], s0 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v7, off, s[40:43], s0 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v8, off, s[40:43], s0 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v9, off, s[40:43], s0 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s0, 0x81c00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], v[4:5], s[36:39], 0 addr64 offset:96
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v6, off, s[40:43], s0 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v7, off, s[40:43], s0 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v8, off, s[40:43], s0 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v9, off, s[40:43], s0 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s0, 0x81400
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], v[4:5], s[36:39], 0 addr64 offset:80
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v6, off, s[40:43], s0 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v7, off, s[40:43], s0 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v8, off, s[40:43], s0 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v9, off, s[40:43], s0 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s0, 0x81800
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], v[4:5], s[36:39], 0 addr64 offset:64
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v6, off, s[40:43], s0 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v7, off, s[40:43], s0 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v8, off, s[40:43], s0 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v9, off, s[40:43], s0 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s0, 0x81000
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], v[4:5], s[36:39], 0 addr64 offset:48
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v6, off, s[40:43], s0 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v7, off, s[40:43], s0 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v8, off, s[40:43], s0 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v9, off, s[40:43], s0 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_mov_b32 s0, 0x80c00
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], v[4:5], s[36:39], 0 addr64 offset:32
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    buffer_load_dword v6, off, s[40:43], s0 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v7, off, s[40:43], s0 offset:4 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v8, off, s[40:43], s0 offset:8 ; 4-byte Folded Reload
+; GFX6-NEXT:    buffer_load_dword v9, off, s[40:43], s0 offset:12 ; 4-byte Folded Reload
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], v[4:5], s[36:39], 0 addr64 offset:16
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[4:5], s[36:39], 0 addr64
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX9-FLATSCR-LABEL: test_limited_sgpr:
diff --git a/llvm/test/CodeGen/AMDGPU/swdev380865.ll b/llvm/test/CodeGen/AMDGPU/swdev380865.ll
index 7201ffaf561662a..6c98c7def23289b 100644
--- a/llvm/test/CodeGen/AMDGPU/swdev380865.ll
+++ b/llvm/test/CodeGen/AMDGPU/swdev380865.ll
@@ -45,9 +45,9 @@ define amdgpu_kernel void @_Z6kernelILi4000ELi1EEvPd(ptr addrspace(1) %x.coerce)
 ; CHECK-NEXT:    v_add_f64 v[0:1], v[0:1], 0
 ; CHECK-NEXT:    s_mov_b32 s2, 0
 ; CHECK-NEXT:    s_mov_b32 s3, 0x40140000
-; CHECK-NEXT:    v_writelane_b32 v2, s6, 9
-; CHECK-NEXT:    v_writelane_b32 v2, s7, 10
-; CHECK-NEXT:    v_writelane_b32 v2, s0, 11
+; CHECK-NEXT:    v_writelane_b32 v2, s0, 9
+; CHECK-NEXT:    v_writelane_b32 v2, s6, 10
+; CHECK-NEXT:    v_writelane_b32 v2, s7, 11
 ; CHECK-NEXT:    v_readlane_b32 s6, v2, 1
 ; CHECK-NEXT:    v_readlane_b32 s7, v2, 2
 ; CHECK-NEXT:    v_add_f64 v[0:1], v[0:1], s[2:3]
@@ -55,8 +55,8 @@ define amdgpu_kernel void @_Z6kernelILi4000ELi1EEvPd(ptr addrspace(1) %x.coerce)
 ; CHECK-NEXT:    s_mov_b32 s0, s2
 ; CHECK-NEXT:    v_writelane_b32 v2, s6, 1
 ; CHECK-NEXT:    v_writelane_b32 v2, s7, 2
-; CHECK-NEXT:    v_readlane_b32 s6, v2, 9
-; CHECK-NEXT:    v_readlane_b32 s7, v2, 10
+; CHECK-NEXT:    v_readlane_b32 s6, v2, 10
+; CHECK-NEXT:    v_readlane_b32 s7, v2, 11
 ; CHECK-NEXT:    s_mov_b32 s6, s2
 ; CHECK-NEXT:    v_add_f64 v[0:1], v[0:1], s[0:1]
 ; CHECK-NEXT:    v_readlane_b32 s0, v2, 3
@@ -92,11 +92,11 @@ define amdgpu_kernel void @_Z6kernelILi4000ELi1EEvPd(ptr addrspace(1) %x.coerce)
 ; CHECK-NEXT:    s_mov_b32 s4, s0
 ; CHECK-NEXT:    v_writelane_b32 v2, s1, 8
 ; CHECK-NEXT:    v_readlane_b32 s0, v2, 0
-; CHECK-NEXT:    v_readlane_b32 s2, v2, 11
+; CHECK-NEXT:    v_readlane_b32 s2, v2, 9
 ; CHECK-NEXT:    s_add_i32 s2, s2, s0
-; CHECK-NEXT:    v_writelane_b32 v2, s2, 11
+; CHECK-NEXT:    v_writelane_b32 v2, s2, 9
 ; CHECK-NEXT:    v_add_f64 v[0:1], v[0:1], s[4:5]
-; CHECK-NEXT:    v_readlane_b32 s0, v2, 11
+; CHECK-NEXT:    v_readlane_b32 s0, v2, 9
 ; CHECK-NEXT:    s_cmpk_lt_i32 s0, 0xa00
 ; CHECK-NEXT:    s_cbranch_scc1 .LBB0_1
 ; CHECK-NEXT:  ; %bb.2: ; %for.cond.cleanup.loopexit
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-spill-placement-issue61083.ll b/llvm/test/CodeGen/AMDGPU/vgpr-spill-placement-issue61083.ll
index c84eee4f9921d08..48e27f4480d7857 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-spill-placement-issue61083.ll
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-spill-placement-issue61083.ll
@@ -39,7 +39,6 @@ define amdgpu_kernel void @__omp_offloading_16_dd2df_main_l9()  {
 ; CHECK-NEXT:    s_cbranch_execz .LBB0_2
 ; CHECK-NEXT:  ; %bb.1: ; %bb193
 ; CHECK-NEXT:  .LBB0_2: ; %bb194
-; CHECK-NEXT:    buffer_load_dword v0, off, s[0:3], 0 offset:8 ; 4-byte Folded Reload
 ; CHECK-NEXT:    s_or_saveexec_b64 s[8:9], -1
 ; CHECK-NEXT:    buffer_load_dword v1, off, s[0:3], 0 offset:4 ; 4-byte Folded Reload
 ; CHECK-NEXT:    s_mov_b64 exec, s[8:9]
@@ -47,7 +46,9 @@ define amdgpu_kernel void @__omp_offloading_16_dd2df_main_l9()  {
 ; CHECK-NEXT:    v_readlane_b32 s4, v1, 0
 ; CHECK-NEXT:    v_readlane_b32 s5, v1, 1
 ; CHECK-NEXT:    s_or_b64 exec, exec, s[4:5]
+; CHECK-NEXT:    buffer_load_dword v0, off, s[0:3], 0 offset:8 ; 4-byte Folded Reload
 ; CHECK-NEXT:    s_mov_b32 s4, 0
+; CHECK-NEXT:    s_waitcnt vmcnt(0)
 ; CHECK-NEXT:    v_cmp_ne_u16_e64 s[4:5], v0, s4
 ; CHECK-NEXT:    s_and_b64 vcc, exec, s[4:5]
 ; CHECK-NEXT:    s_cbranch_vccnz .LBB0_4
diff --git a/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll b/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll
index b6e7da97e008905..d2340872eb545bb 100644
--- a/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll
+++ b/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll
@@ -223,9 +223,7 @@ define amdgpu_gfx void @strict_wwm_cfg(ptr addrspace(8) inreg %tmp14, i32 %arg)
 ; GFX9-O0-NEXT:    s_or_saveexec_b64 s[46:47], -1
 ; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[46:47]
-; GFX9-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    s_waitcnt vmcnt(2)
+; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-O0-NEXT:    v_readlane_b32 s36, v0, 4
 ; GFX9-O0-NEXT:    v_readlane_b32 s37, v0, 5
 ; GFX9-O0-NEXT:    s_or_b64 exec, exec, s[36:37]
@@ -233,6 +231,8 @@ define amdgpu_gfx void @strict_wwm_cfg(ptr addrspace(8) inreg %tmp14, i32 %arg)
 ; GFX9-O0-NEXT:    v_readlane_b32 s39, v0, 1
 ; GFX9-O0-NEXT:    v_readlane_b32 s34, v0, 2
 ; GFX9-O0-NEXT:    v_readlane_b32 s35, v0, 3
+; GFX9-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-O0-NEXT:    v_cmp_eq_u32_e64 s[36:37], v3, v4
 ; GFX9-O0-NEXT:    v_cndmask_b32_e64 v3, 0, 1, s[36:37]



More information about the llvm-commits mailing list