[llvm] Split vgpr regalloc pipeline (PR #93526)

Christudasan Devadasan via llvm-commits llvm-commits at lists.llvm.org
Thu Jun 13 22:22:07 PDT 2024


https://github.com/cdevadas updated https://github.com/llvm/llvm-project/pull/93526

>From b1482db163894851a836d6f98e0b9d8c977ecc0e Mon Sep 17 00:00:00 2001
From: Christudasan Devadasan <Christudasan.Devadasan at amd.com>
Date: Mon, 27 May 2024 10:19:36 +0530
Subject: [PATCH 01/13] [CodeGen] change prototype of RegClassFilterFunc

change prototype of RegClassFilterFunc so that we can filter
not just by RegClass. We need to implement more complicated
filter based upon some other info associated with each register.

Patch provided by: Gang Chen (gangc at amd.com)
---
 llvm/include/llvm/CodeGen/RegAllocCommon.h     |  9 ++++++---
 llvm/lib/CodeGen/RegAllocBase.cpp              |  3 +--
 llvm/lib/CodeGen/RegAllocFast.cpp              |  3 +--
 llvm/lib/CodeGen/RegAllocGreedy.cpp            |  6 +++---
 llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 13 ++++++++-----
 llvm/lib/Target/RISCV/RISCVTargetMachine.cpp   |  6 ++++--
 llvm/lib/Target/X86/X86TargetMachine.cpp       |  6 ++++--
 7 files changed, 27 insertions(+), 19 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/RegAllocCommon.h b/llvm/include/llvm/CodeGen/RegAllocCommon.h
index 757ca8e112eec..f3423083eef3a 100644
--- a/llvm/include/llvm/CodeGen/RegAllocCommon.h
+++ b/llvm/include/llvm/CodeGen/RegAllocCommon.h
@@ -10,22 +10,25 @@
 #define LLVM_CODEGEN_REGALLOCCOMMON_H
 
 #include <functional>
+#include <llvm/CodeGen/Register.h>
 
 namespace llvm {
 
 class TargetRegisterClass;
 class TargetRegisterInfo;
+class MachineRegisterInfo;
 
 typedef std::function<bool(const TargetRegisterInfo &TRI,
-                           const TargetRegisterClass &RC)> RegClassFilterFunc;
+                           const MachineRegisterInfo &MRI, const Register Reg)>
+    RegClassFilterFunc;
 
 /// Default register class filter function for register allocation. All virtual
 /// registers should be allocated.
 static inline bool allocateAllRegClasses(const TargetRegisterInfo &,
-                                         const TargetRegisterClass &) {
+                                         const MachineRegisterInfo &,
+                                         const Register) {
   return true;
 }
-
 }
 
 #endif // LLVM_CODEGEN_REGALLOCCOMMON_H
diff --git a/llvm/lib/CodeGen/RegAllocBase.cpp b/llvm/lib/CodeGen/RegAllocBase.cpp
index d0dec372f6896..a4645ed93029d 100644
--- a/llvm/lib/CodeGen/RegAllocBase.cpp
+++ b/llvm/lib/CodeGen/RegAllocBase.cpp
@@ -181,8 +181,7 @@ void RegAllocBase::enqueue(const LiveInterval *LI) {
   if (VRM->hasPhys(Reg))
     return;
 
-  const TargetRegisterClass &RC = *MRI->getRegClass(Reg);
-  if (ShouldAllocateClass(*TRI, RC)) {
+  if (ShouldAllocateClass(*TRI, *MRI, Reg)) {
     LLVM_DEBUG(dbgs() << "Enqueuing " << printReg(Reg, TRI) << '\n');
     enqueueImpl(LI);
   } else {
diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp
index 6740e1f0edb4f..f6419daba6a2d 100644
--- a/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -417,8 +417,7 @@ INITIALIZE_PASS(RegAllocFast, "regallocfast", "Fast Register Allocator", false,
 
 bool RegAllocFast::shouldAllocateRegister(const Register Reg) const {
   assert(Reg.isVirtual());
-  const TargetRegisterClass &RC = *MRI->getRegClass(Reg);
-  return ShouldAllocateClass(*TRI, RC);
+  return ShouldAllocateClass(*TRI, *MRI, Reg);
 }
 
 void RegAllocFast::setPhysRegState(MCPhysReg PhysReg, unsigned NewState) {
diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp
index 348277224c7ae..b3bf1899ceeaf 100644
--- a/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -2306,9 +2306,9 @@ void RAGreedy::tryHintRecoloring(const LiveInterval &VirtReg) {
     if (Reg.isPhysical())
       continue;
 
-    // This may be a skipped class
+    // This may be a skipped register.
     if (!VRM->hasPhys(Reg)) {
-      assert(!ShouldAllocateClass(*TRI, *MRI->getRegClass(Reg)) &&
+      assert(!ShouldAllocateClass(*TRI, *MRI, Reg) &&
              "We have an unallocated variable which should have been handled");
       continue;
     }
@@ -2698,7 +2698,7 @@ bool RAGreedy::hasVirtRegAlloc() {
     const TargetRegisterClass *RC = MRI->getRegClass(Reg);
     if (!RC)
       continue;
-    if (ShouldAllocateClass(*TRI, *RC))
+    if (ShouldAllocateClass(*TRI, *MRI, Reg))
       return true;
   }
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index dbbfe34a63863..8e6affee489f1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -83,16 +83,19 @@ class VGPRRegisterRegAlloc : public RegisterRegAllocBase<VGPRRegisterRegAlloc> {
 };
 
 static bool onlyAllocateSGPRs(const TargetRegisterInfo &TRI,
-                              const TargetRegisterClass &RC) {
-  return static_cast<const SIRegisterInfo &>(TRI).isSGPRClass(&RC);
+                              const MachineRegisterInfo &MRI,
+                              const Register Reg) {
+  const TargetRegisterClass *RC = MRI.getRegClass(Reg);
+  return static_cast<const SIRegisterInfo &>(TRI).isSGPRClass(RC);
 }
 
 static bool onlyAllocateVGPRs(const TargetRegisterInfo &TRI,
-                              const TargetRegisterClass &RC) {
-  return !static_cast<const SIRegisterInfo &>(TRI).isSGPRClass(&RC);
+                              const MachineRegisterInfo &MRI,
+                              const Register Reg) {
+  const TargetRegisterClass *RC = MRI.getRegClass(Reg);
+  return !static_cast<const SIRegisterInfo &>(TRI).isSGPRClass(RC);
 }
 
-
 /// -{sgpr|vgpr}-regalloc=... command line option.
 static FunctionPass *useDefaultRegisterAllocator() { return nullptr; }
 
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index d9f8222669cab..bf0e7f11aa123 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -278,8 +278,10 @@ class RVVRegisterRegAlloc : public RegisterRegAllocBase<RVVRegisterRegAlloc> {
 };
 
 static bool onlyAllocateRVVReg(const TargetRegisterInfo &TRI,
-                               const TargetRegisterClass &RC) {
-  return RISCVRegisterInfo::isRVVRegClass(&RC);
+                               const MachineRegisterInfo &MRI,
+                               const Register Reg) {
+  const TargetRegisterClass *RC = MRI.getRegClass(Reg);
+  return RISCVRegisterInfo::isRVVRegClass(RC);
 }
 
 static FunctionPass *useDefaultRegisterAllocator() { return nullptr; }
diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp
index 86b456019c4e5..eab537e8a5f8b 100644
--- a/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -652,8 +652,10 @@ std::unique_ptr<CSEConfigBase> X86PassConfig::getCSEConfig() const {
 }
 
 static bool onlyAllocateTileRegisters(const TargetRegisterInfo &TRI,
-                                      const TargetRegisterClass &RC) {
-  return static_cast<const X86RegisterInfo &>(TRI).isTileRegisterClass(&RC);
+                                      const MachineRegisterInfo &MRI,
+                                      const Register Reg) {
+  const TargetRegisterClass *RC = MRI.getRegClass(Reg);
+  return static_cast<const X86RegisterInfo &>(TRI).isTileRegisterClass(RC);
 }
 
 bool X86PassConfig::addRegAssignAndRewriteOptimized() {

>From c5f22e0a1a27d364631e6d7f48241033c3b5e6d9 Mon Sep 17 00:00:00 2001
From: Christudasan Devadasan <Christudasan.Devadasan at amd.com>
Date: Tue, 28 May 2024 12:19:54 +0530
Subject: [PATCH 02/13] [AMDGPU] Split VGPR regalloc pipeline

Allocating wwm-registers and regular VGPR operands
together imposes many challenges in the way the
registers are reused during allocation. There are
times when regalloc reuses the registers of regular
VGPRs operations for wwm-operations in a small range
leading to unwantedly clobbering their inactive lanes
causing correctness issues which are hard to trace.

This patch splits the VGPR allocation pipeline further
to allocate wwm-registers first and the regular VGPR
operands in a separate pipeline. The splitting would
ensure that the physical registers used for wwm
allocations won't taken part in the next allocation
pipeline to avoid any such clobbering.
---
 .../llvm/CodeGen/MachineRegisterInfo.h        |    2 +
 llvm/lib/Target/AMDGPU/AMDGPU.h               |    4 +
 .../Target/AMDGPU/AMDGPUReserveWWMRegs.cpp    |  104 +
 .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp |   97 +-
 llvm/lib/Target/AMDGPU/CMakeLists.txt         |    1 +
 llvm/lib/Target/AMDGPU/SIFrameLowering.cpp    |   62 +-
 llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp  |  168 +-
 .../Target/AMDGPU/SIMachineFunctionInfo.cpp   |   29 +-
 .../lib/Target/AMDGPU/SIMachineFunctionInfo.h |   17 +-
 llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp     |   68 +-
 llvm/lib/Target/AMDGPU/SIRegisterInfo.h       |    5 +
 .../GlobalISel/image-waterfall-loop-O0.ll     |  219 +-
 .../GlobalISel/llvm.amdgcn.set.inactive.ll    |    2 +-
 .../AMDGPU/alloc-aligned-tuples-gfx908.mir    |    2 +-
 .../AMDGPU/alloc-aligned-tuples-gfx90a.mir    |    2 +-
 .../atomic_optimizations_pixelshader.ll       |   15 +-
 .../AMDGPU/bb-prolog-spill-during-regalloc.ll |   41 +-
 .../CodeGen/AMDGPU/cf-loop-on-constant.ll     |  184 +-
 llvm/test/CodeGen/AMDGPU/collapse-endcf.ll    |  663 ++-
 llvm/test/CodeGen/AMDGPU/div_i128.ll          | 3771 ++++++++---------
 .../AMDGPU/extend-wwm-virt-reg-liveness.mir   |   80 +-
 llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll |    2 +
 .../CodeGen/AMDGPU/fold-reload-into-exec.mir  |   40 +-
 .../CodeGen/AMDGPU/fold-reload-into-m0.mir    |   12 +-
 .../AMDGPU/gfx-callable-return-types.ll       |   88 +-
 .../AMDGPU/global_atomics_scan_fadd.ll        |   10 +-
 .../AMDGPU/global_atomics_scan_fmax.ll        |    6 +-
 .../AMDGPU/global_atomics_scan_fmin.ll        |    6 +-
 .../AMDGPU/global_atomics_scan_fsub.ll        |   10 +-
 .../greedy-instruction-split-subrange.mir     |   28 +-
 .../identical-subrange-spill-infloop.ll       |  522 ++-
 .../AMDGPU/illegal-eviction-assert.mir        |    2 +-
 .../kernel-vgpr-spill-mubuf-with-voffset.ll   |   37 +-
 llvm/test/CodeGen/AMDGPU/llc-pipeline.ll      |   22 +
 ...uf-legalize-operands-non-ptr-intrinsics.ll |  572 ++-
 .../CodeGen/AMDGPU/mubuf-legalize-operands.ll |  659 ++-
 ...al-regcopy-and-spill-missed-at-regalloc.ll |   35 +-
 .../AMDGPU/partial-sgpr-to-vgpr-spills.ll     |    2 +
 .../AMDGPU/pei-amdgpu-cs-chain-preserve.mir   |   40 +-
 .../CodeGen/AMDGPU/pei-amdgpu-cs-chain.mir    |   22 +-
 llvm/test/CodeGen/AMDGPU/pr51516.mir          |    2 +-
 .../AMDGPU/preserve-only-inactive-lane.mir    |    6 -
 .../AMDGPU/preserve-wwm-copy-dst-reg.ll       |  714 ++--
 ...lloc-failure-overlapping-insert-assert.mir |    4 +-
 llvm/test/CodeGen/AMDGPU/rem_i128.ll          | 2798 ++++++------
 llvm/test/CodeGen/AMDGPU/remat-vop.mir        | 3090 +++++++-------
 .../scc-clobbered-sgpr-to-vmem-spill.ll       |    2 +
 .../CodeGen/AMDGPU/sgpr-regalloc-flags.ll     |   30 +-
 .../sgpr-spill-dead-frame-in-dbg-value.mir    |   17 +-
 ...sgpr-spill-incorrect-fi-bookkeeping-bug.ll |    2 +
 .../CodeGen/AMDGPU/sgpr-spill-no-vgprs.ll     |    2 +
 .../AMDGPU/sgpr-spill-overlap-wwm-reserve.mir |  307 +-
 .../AMDGPU/sgpr-spills-empty-prolog-block.mir |    4 +-
 .../AMDGPU/sgpr-spills-split-regalloc.ll      |   62 +-
 .../CodeGen/AMDGPU/si-spill-sgpr-stack.ll     |    2 +
 .../AMDGPU/snippet-copy-bundle-regression.mir |    8 +-
 .../AMDGPU/spill-csr-frame-ptr-reg-copy.ll    |   14 +-
 .../AMDGPU/spill-empty-live-interval.mir      |   17 +-
 .../AMDGPU/spill-reg-tuple-super-reg-use.mir  |   22 +-
 .../AMDGPU/spill-sgpr-to-virtual-vgpr.mir     |   12 +-
 .../AMDGPU/spill-sgpr-used-for-exec-copy.mir  |   12 +-
 .../CodeGen/AMDGPU/spill-vector-superclass.ll |    6 +-
 .../spill-vgpr-to-agpr-update-regscavenger.ll |   37 +-
 llvm/test/CodeGen/AMDGPU/spill192.mir         |    2 +-
 llvm/test/CodeGen/AMDGPU/spill224.mir         |    2 +-
 llvm/test/CodeGen/AMDGPU/spill288.mir         |    2 +-
 llvm/test/CodeGen/AMDGPU/spill320.mir         |    2 +-
 llvm/test/CodeGen/AMDGPU/spill352.mir         |    2 +-
 llvm/test/CodeGen/AMDGPU/spill384.mir         |    2 +-
 .../CodeGen/AMDGPU/stacksave_stackrestore.ll  |  153 +-
 ...d-op-for-wwm-scratch-reg-spill-restore.mir |   16 +-
 llvm/test/CodeGen/AMDGPU/trap-abis.ll         |   44 +-
 .../true16-ra-pre-gfx11-regression-test.mir   |    2 +-
 .../AMDGPU/vgpr-spill-placement-issue61083.ll |   35 +-
 .../virtregrewrite-undef-identity-copy.mir    |   13 +-
 .../AMDGPU/whole-wave-register-copy.ll        |   23 +-
 .../AMDGPU/whole-wave-register-spill.ll       |   52 +-
 .../test/CodeGen/AMDGPU/wwm-reserved-spill.ll |  471 +-
 llvm/test/CodeGen/AMDGPU/wwm-reserved.ll      |  502 +--
 79 files changed, 8028 insertions(+), 8116 deletions(-)
 create mode 100644 llvm/lib/Target/AMDGPU/AMDGPUReserveWWMRegs.cpp

diff --git a/llvm/include/llvm/CodeGen/MachineRegisterInfo.h b/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
index 09d9a0b4ec402..01d91982ae1c7 100644
--- a/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
+++ b/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
@@ -184,6 +184,8 @@ class MachineRegisterInfo {
       TheDelegate->MRI_NoteCloneVirtualRegister(NewReg, SrcReg);
   }
 
+  const MachineFunction &getMF() const { return *MF; }
+
   //===--------------------------------------------------------------------===//
   // Function State
   //===--------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 6016bd5187d88..cd9f3fb162fd2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -56,6 +56,7 @@ ModulePass *createAMDGPURemoveIncompatibleFunctionsPass(const TargetMachine *);
 FunctionPass *createAMDGPUCodeGenPreparePass();
 FunctionPass *createAMDGPULateCodeGenPreparePass();
 FunctionPass *createAMDGPUMachineCFGStructurizerPass();
+FunctionPass *createAMDGPUReserveWWMRegsPass();
 FunctionPass *createAMDGPURewriteOutArgumentsPass();
 ModulePass *
 createAMDGPULowerModuleLDSLegacyPass(const AMDGPUTargetMachine *TM = nullptr);
@@ -149,6 +150,9 @@ struct AMDGPULowerBufferFatPointersPass
   const TargetMachine &TM;
 };
 
+void initializeAMDGPUReserveWWMRegsPass(PassRegistry &);
+extern char &AMDGPUReserveWWMRegsID;
+
 void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &);
 extern char &AMDGPURewriteOutArgumentsID;
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUReserveWWMRegs.cpp b/llvm/lib/Target/AMDGPU/AMDGPUReserveWWMRegs.cpp
new file mode 100644
index 0000000000000..5ed8cd4231d00
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/AMDGPUReserveWWMRegs.cpp
@@ -0,0 +1,104 @@
+//===-- AMDGPUReserveWWMRegs.cpp - Add WWM Regs to the reserved regs list
+//---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This pass should be invoked at the end of wwm-regalloc pipeline.
+/// It identifies the WWM regs allocated during this pipeline and add
+/// them to the list of reserved registers so that they won't be available for
+/// per-thread VGPR allocation in the subsequent regalloc pipeline.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "GCNSubtarget.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/InitializePasses.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "amdgpu-reserve-wwm-regs"
+
+namespace {
+
+class AMDGPUReserveWWMRegs : public MachineFunctionPass {
+public:
+  static char ID;
+
+  AMDGPUReserveWWMRegs() : MachineFunctionPass(ID) {
+    initializeAMDGPUReserveWWMRegsPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  StringRef getPassName() const override {
+    return "AMDGPU Reserve WWM Registers";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesAll();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+};
+
+} // End anonymous namespace.
+
+INITIALIZE_PASS(AMDGPUReserveWWMRegs, DEBUG_TYPE,
+                "AMDGPU Reserve WWM Registers", false, false)
+
+char AMDGPUReserveWWMRegs::ID = 0;
+
+char &llvm::AMDGPUReserveWWMRegsID = AMDGPUReserveWWMRegs::ID;
+
+bool AMDGPUReserveWWMRegs::runOnMachineFunction(MachineFunction &MF) {
+  SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+
+  bool Changed = false;
+  for (MachineBasicBlock &MBB : MF) {
+    for (MachineInstr &MI : MBB) {
+      unsigned Opc = MI.getOpcode();
+      if (Opc != AMDGPU::SI_SPILL_S32_TO_VGPR &&
+          Opc != AMDGPU::SI_RESTORE_S32_FROM_VGPR)
+        continue;
+
+      Register Reg = Opc == AMDGPU::SI_SPILL_S32_TO_VGPR
+                         ? MI.getOperand(0).getReg()
+                         : MI.getOperand(1).getReg();
+
+      assert(Reg.isPhysical() &&
+             "All WWM registers should have been allocated by now.");
+
+      MFI->reserveWWMRegister(Reg);
+      Changed |= true;
+    }
+  }
+
+  // Reset the renamable flag for MOs involving wwm-regs to get rid of the MIR
+  // Verifier error.
+  for (MachineBasicBlock &MBB : MF) {
+    for (MachineInstr &MI : MBB) {
+      for (MachineOperand &MO : MI.operands()) {
+        if (!MO.isReg())
+          continue;
+
+        Register Reg = MO.getReg();
+        if (Reg.isPhysical() &&
+            llvm::is_contained(MFI->getWWMReservedRegs(), Reg))
+          MO.setIsRenamable(false);
+      }
+    }
+  }
+
+  // Now clear the NonWWMRegMask earlier set during wwm-regalloc.
+  MFI->clearNonWWMRegAllocMask();
+
+  return Changed;
+}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 8e6affee489f1..e3375c758b8d5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -82,6 +82,12 @@ class VGPRRegisterRegAlloc : public RegisterRegAllocBase<VGPRRegisterRegAlloc> {
     : RegisterRegAllocBase(N, D, C) {}
 };
 
+class WWMRegisterRegAlloc : public RegisterRegAllocBase<WWMRegisterRegAlloc> {
+public:
+  WWMRegisterRegAlloc(const char *N, const char *D, FunctionPassCtor C)
+      : RegisterRegAllocBase(N, D, C) {}
+};
+
 static bool onlyAllocateSGPRs(const TargetRegisterInfo &TRI,
                               const MachineRegisterInfo &MRI,
                               const Register Reg) {
@@ -96,13 +102,24 @@ static bool onlyAllocateVGPRs(const TargetRegisterInfo &TRI,
   return !static_cast<const SIRegisterInfo &>(TRI).isSGPRClass(RC);
 }
 
-/// -{sgpr|vgpr}-regalloc=... command line option.
+static bool onlyAllocateWWMRegs(const TargetRegisterInfo &TRI,
+                                const MachineRegisterInfo &MRI,
+                                const Register Reg) {
+  const SIMachineFunctionInfo *MFI =
+      MRI.getMF().getInfo<SIMachineFunctionInfo>();
+  const TargetRegisterClass *RC = MRI.getRegClass(Reg);
+  return !static_cast<const SIRegisterInfo &>(TRI).isSGPRClass(RC) &&
+         MFI->checkFlag(Reg, AMDGPU::VirtRegFlag::WWM_REG);
+}
+
+/// -{sgpr|wwm|vgpr}-regalloc=... command line option.
 static FunctionPass *useDefaultRegisterAllocator() { return nullptr; }
 
 /// A dummy default pass factory indicates whether the register allocator is
 /// overridden on the command line.
 static llvm::once_flag InitializeDefaultSGPRRegisterAllocatorFlag;
 static llvm::once_flag InitializeDefaultVGPRRegisterAllocatorFlag;
+static llvm::once_flag InitializeDefaultWWMRegisterAllocatorFlag;
 
 static SGPRRegisterRegAlloc
 defaultSGPRRegAlloc("default",
@@ -119,6 +136,11 @@ static cl::opt<VGPRRegisterRegAlloc::FunctionPassCtor, false,
 VGPRRegAlloc("vgpr-regalloc", cl::Hidden, cl::init(&useDefaultRegisterAllocator),
              cl::desc("Register allocator to use for VGPRs"));
 
+static cl::opt<WWMRegisterRegAlloc::FunctionPassCtor, false,
+               RegisterPassParser<WWMRegisterRegAlloc>>
+    WWMRegAlloc("wwm-regalloc", cl::Hidden,
+                cl::init(&useDefaultRegisterAllocator),
+                cl::desc("Register allocator to use for WWM registers"));
 
 static void initializeDefaultSGPRRegisterAllocatorOnce() {
   RegisterRegAlloc::FunctionPassCtor Ctor = SGPRRegisterRegAlloc::getDefault();
@@ -138,6 +160,15 @@ static void initializeDefaultVGPRRegisterAllocatorOnce() {
   }
 }
 
+static void initializeDefaultWWMRegisterAllocatorOnce() {
+  RegisterRegAlloc::FunctionPassCtor Ctor = WWMRegisterRegAlloc::getDefault();
+
+  if (!Ctor) {
+    Ctor = WWMRegAlloc;
+    WWMRegisterRegAlloc::setDefault(WWMRegAlloc);
+  }
+}
+
 static FunctionPass *createBasicSGPRRegisterAllocator() {
   return createBasicRegisterAllocator(onlyAllocateSGPRs);
 }
@@ -162,6 +193,18 @@ static FunctionPass *createFastVGPRRegisterAllocator() {
   return createFastRegisterAllocator(onlyAllocateVGPRs, true);
 }
 
+static FunctionPass *createBasicWWMRegisterAllocator() {
+  return createBasicRegisterAllocator(onlyAllocateWWMRegs);
+}
+
+static FunctionPass *createGreedyWWMRegisterAllocator() {
+  return createGreedyRegisterAllocator(onlyAllocateWWMRegs);
+}
+
+static FunctionPass *createFastWWMRegisterAllocator() {
+  return createFastRegisterAllocator(onlyAllocateWWMRegs, false);
+}
+
 static SGPRRegisterRegAlloc basicRegAllocSGPR(
   "basic", "basic register allocator", createBasicSGPRRegisterAllocator);
 static SGPRRegisterRegAlloc greedyRegAllocSGPR(
@@ -178,7 +221,15 @@ static VGPRRegisterRegAlloc greedyRegAllocVGPR(
 
 static VGPRRegisterRegAlloc fastRegAllocVGPR(
   "fast", "fast register allocator", createFastVGPRRegisterAllocator);
-}
+static WWMRegisterRegAlloc basicRegAllocWWMReg("basic",
+                                               "basic register allocator",
+                                               createBasicWWMRegisterAllocator);
+static WWMRegisterRegAlloc
+    greedyRegAllocWWMReg("greedy", "greedy register allocator",
+                         createGreedyWWMRegisterAllocator);
+static WWMRegisterRegAlloc fastRegAllocWWMReg("fast", "fast register allocator",
+                                              createFastWWMRegisterAllocator);
+} // namespace
 
 static cl::opt<bool>
 EnableEarlyIfConversion("amdgpu-early-ifcvt", cl::Hidden,
@@ -427,6 +478,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
   initializeAMDGPURemoveIncompatibleFunctionsPass(*PR);
   initializeAMDGPULowerModuleLDSLegacyPass(*PR);
   initializeAMDGPULowerBufferFatPointersPass(*PR);
+  initializeAMDGPUReserveWWMRegsPass(*PR);
   initializeAMDGPURewriteOutArgumentsPass(*PR);
   initializeAMDGPURewriteUndefForPHILegacyPass(*PR);
   initializeAMDGPUUnifyMetadataPass(*PR);
@@ -926,6 +978,7 @@ class GCNPassConfig final : public AMDGPUPassConfig {
 
   FunctionPass *createSGPRAllocPass(bool Optimized);
   FunctionPass *createVGPRAllocPass(bool Optimized);
+  FunctionPass *createWWMRegAllocPass(bool Optimized);
   FunctionPass *createRegAllocPass(bool Optimized) override;
 
   bool addRegAssignAndRewriteFast() override;
@@ -1334,7 +1387,6 @@ void GCNPassConfig::addOptimizedRegAlloc() {
 }
 
 bool GCNPassConfig::addPreRewrite() {
-  addPass(&SILowerWWMCopiesID);
   if (EnableRegReassign)
     addPass(&GCNNSAReassignID);
   return true;
@@ -1370,12 +1422,28 @@ FunctionPass *GCNPassConfig::createVGPRAllocPass(bool Optimized) {
   return createFastVGPRRegisterAllocator();
 }
 
+FunctionPass *GCNPassConfig::createWWMRegAllocPass(bool Optimized) {
+  // Initialize the global default.
+  llvm::call_once(InitializeDefaultWWMRegisterAllocatorFlag,
+                  initializeDefaultWWMRegisterAllocatorOnce);
+
+  RegisterRegAlloc::FunctionPassCtor Ctor = WWMRegisterRegAlloc::getDefault();
+  if (Ctor != useDefaultRegisterAllocator)
+    return Ctor();
+
+  if (Optimized)
+    return createGreedyWWMRegisterAllocator();
+
+  return createFastWWMRegisterAllocator();
+}
+
 FunctionPass *GCNPassConfig::createRegAllocPass(bool Optimized) {
   llvm_unreachable("should not be used");
 }
 
 static const char RegAllocOptNotSupportedMessage[] =
-  "-regalloc not supported with amdgcn. Use -sgpr-regalloc and -vgpr-regalloc";
+    "-regalloc not supported with amdgcn. Use -sgpr-regalloc, -wwm-regalloc, "
+    "and -vgpr-regalloc";
 
 bool GCNPassConfig::addRegAssignAndRewriteFast() {
   if (!usingDefaultRegAlloc())
@@ -1387,11 +1455,20 @@ bool GCNPassConfig::addRegAssignAndRewriteFast() {
 
   // Equivalent of PEI for SGPRs.
   addPass(&SILowerSGPRSpillsID);
+
+  // To Allocate wwm registers used in whole quad mode operations (for pixel
+  // shaders).
   addPass(&SIPreAllocateWWMRegsID);
 
-  addPass(createVGPRAllocPass(false));
+  // For allocating other wwm register operands.
+  addPass(createWWMRegAllocPass(false));
 
   addPass(&SILowerWWMCopiesID);
+  addPass(&AMDGPUReserveWWMRegsID);
+
+  // For allocating per-thread VGPRs.
+  addPass(createVGPRAllocPass(false));
+
   return true;
 }
 
@@ -1411,8 +1488,18 @@ bool GCNPassConfig::addRegAssignAndRewriteOptimized() {
 
   // Equivalent of PEI for SGPRs.
   addPass(&SILowerSGPRSpillsID);
+
+  // To Allocate wwm registers used in whole quad mode operations (for pixel
+  // shaders).
   addPass(&SIPreAllocateWWMRegsID);
 
+  // For allocating other whole wave mode registers.
+  addPass(createWWMRegAllocPass(true));
+  addPass(&SILowerWWMCopiesID);
+  addPass(createVirtRegRewriter(false));
+  addPass(&AMDGPUReserveWWMRegsID);
+
+  // For allocating per-thread VGPRs.
   addPass(createVGPRAllocPass(true));
 
   addPreRewrite();
diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt
index c992352cb78da..178af07048571 100644
--- a/llvm/lib/Target/AMDGPU/CMakeLists.txt
+++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt
@@ -94,6 +94,7 @@ add_llvm_target(AMDGPUCodeGen
   AMDGPURegBankSelect.cpp
   AMDGPURegisterBankInfo.cpp
   AMDGPURemoveIncompatibleFunctions.cpp
+  AMDGPUReserveWWMRegs.cpp
   AMDGPUResourceUsageAnalysis.cpp
   AMDGPURewriteOutArguments.cpp
   AMDGPURewriteUndefForPHI.cpp
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index eae666ab0e7d7..fbb6c3d9fe24b 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -1555,6 +1555,17 @@ void SIFrameLowering::determinePrologEpilogSGPRSaves(
   }
 }
 
+// Mark all WWM VGPRs as BB LiveIns.
+static void addWwmRegBBLiveIn(MachineFunction &MF) {
+  SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+  for (MachineBasicBlock &MBB : MF) {
+    for (auto &Reg : MFI->getWWMReservedRegs())
+      MBB.addLiveIn(Reg);
+
+    MBB.sortUniqueLiveIns();
+  }
+}
+
 // Only report VGPRs to generic code.
 void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
                                            BitVector &SavedVGPRs,
@@ -1567,11 +1578,7 @@ void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
   if (MFI->isChainFunction() && !MF.getFrameInfo().hasTailCall())
     return;
 
-  MFI->shiftSpillPhysVGPRsToLowestRange(MF);
-
   TargetFrameLowering::determineCalleeSaves(MF, SavedVGPRs, RS);
-  if (MFI->isEntryFunction())
-    return;
 
   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
   const SIRegisterInfo *TRI = ST.getRegisterInfo();
@@ -1581,19 +1588,9 @@ void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
   MachineInstr *ReturnMI = nullptr;
   for (MachineBasicBlock &MBB : MF) {
     for (MachineInstr &MI : MBB) {
-      // WRITELANE instructions used for SGPR spills can overwrite the inactive
-      // lanes of VGPRs and callee must spill and restore them even if they are
-      // marked Caller-saved.
-
-      // TODO: Handle this elsewhere at an early point. Walking through all MBBs
-      // here would be a bad heuristic. A better way should be by calling
-      // allocateWWMSpill during the regalloc pipeline whenever a physical
-      // register is allocated for the intended virtual registers.
-      if (MI.getOpcode() == AMDGPU::SI_SPILL_S32_TO_VGPR)
-        MFI->allocateWWMSpill(MF, MI.getOperand(0).getReg());
-      else if (MI.getOpcode() == AMDGPU::SI_RESTORE_S32_FROM_VGPR)
-        MFI->allocateWWMSpill(MF, MI.getOperand(1).getReg());
-      else if (TII->isWWMRegSpillOpcode(MI.getOpcode()))
+      // TODO: Walking through all MBBs here would be a bad heuristic. Better
+      // handle them elsewhere.
+      if (TII->isWWMRegSpillOpcode(MI.getOpcode()))
         NeedExecCopyReservedReg = true;
       else if (MI.getOpcode() == AMDGPU::SI_RETURN ||
                MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG ||
@@ -1608,6 +1605,25 @@ void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
     }
   }
 
+  SmallVector<Register> SortedWWMVGPRs;
+  for (auto &Reg : MFI->getWWMReservedRegs()) {
+    // The shift-back is needed only for the VGPRs used for SGPR spills and they
+    // are of 32-bit size. SIPreAllocateWWMRegs pass can add tuples into WWM
+    // reserved registers.
+    const TargetRegisterClass *RC = TRI->getPhysRegBaseClass(Reg);
+    if (TRI->getRegSizeInBits(*RC) > 32)
+      continue;
+    SortedWWMVGPRs.push_back(Reg);
+  }
+
+  sort(SortedWWMVGPRs, std::greater<Register>());
+  MFI->shiftWwmVGPRsToLowestRange(MF, SortedWWMVGPRs, SavedVGPRs);
+
+  if (MFI->isEntryFunction()) {
+    addWwmRegBBLiveIn(MF);
+    return;
+  }
+
   // Remove any VGPRs used in the return value because these do not need to be saved.
   // This prevents CSR restore from clobbering return VGPRs.
   if (ReturnMI) {
@@ -1617,6 +1633,10 @@ void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
     }
   }
 
+  // Create the stack objects for WWM registers now.
+  for (auto &Reg : MFI->getWWMReservedRegs())
+    MFI->allocateWWMSpill(MF, Reg);
+
   // Ignore the SGPRs the default implementation found.
   SavedVGPRs.clearBitsNotInMask(TRI->getAllVectorRegMask());
 
@@ -1633,13 +1653,7 @@ void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
   for (auto &Reg : MFI->getWWMSpills())
     SavedVGPRs.reset(Reg.first);
 
-  // Mark all lane VGPRs as BB LiveIns.
-  for (MachineBasicBlock &MBB : MF) {
-    for (auto &Reg : MFI->getWWMSpills())
-      MBB.addLiveIn(Reg.first);
-
-    MBB.sortUniqueLiveIns();
-  }
+  addWwmRegBBLiveIn(MF);
 }
 
 void SIFrameLowering::determineCalleeSavesSGPR(MachineFunction &MF,
diff --git a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
index b6a0152f6fa83..eb6a47e1a41d3 100644
--- a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
@@ -20,6 +20,7 @@
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 #include "SIMachineFunctionInfo.h"
 #include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
 #include "llvm/InitializePasses.h"
@@ -32,12 +33,18 @@ using MBBVector = SmallVector<MachineBasicBlock *, 4>;
 
 namespace {
 
+static cl::opt<unsigned> MaxNumVGPRsForWwmAllocation(
+    "amdgpu-num-vgprs-for-wwm-alloc",
+    cl::desc("Max num VGPRs for whole-wave register allocation."),
+    cl::ReallyHidden, cl::init(5));
+
 class SILowerSGPRSpills : public MachineFunctionPass {
 private:
   const SIRegisterInfo *TRI = nullptr;
   const SIInstrInfo *TII = nullptr;
   LiveIntervals *LIS = nullptr;
   SlotIndexes *Indexes = nullptr;
+  MachineDominatorTree *MDT = nullptr;
 
   // Save and Restore blocks of the current function. Typically there is a
   // single save block, unless Windows EH funclets are involved.
@@ -52,11 +59,15 @@ class SILowerSGPRSpills : public MachineFunctionPass {
   void calculateSaveRestoreBlocks(MachineFunction &MF);
   bool spillCalleeSavedRegs(MachineFunction &MF,
                             SmallVectorImpl<int> &CalleeSavedFIs);
-  void extendWWMVirtRegLiveness(MachineFunction &MF, LiveIntervals *LIS);
+  void updateLaneVGPRDomInstr(
+      int FI, MachineBasicBlock *MBB, MachineBasicBlock::iterator InsertPt,
+      DenseMap<Register, MachineBasicBlock::iterator> &LaneVGPRDomInstr);
 
+  void determineRegsForWWMAllocation(MachineFunction &MF, BitVector &RegMask);
   bool runOnMachineFunction(MachineFunction &MF) override;
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<MachineDominatorTree>();
     AU.setPreservesAll();
     MachineFunctionPass::getAnalysisUsage(AU);
   }
@@ -77,6 +88,7 @@ INITIALIZE_PASS_BEGIN(SILowerSGPRSpills, DEBUG_TYPE,
                       "SI lower SGPR spill instructions", false, false)
 INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
 INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
 INITIALIZE_PASS_END(SILowerSGPRSpills, DEBUG_TYPE,
                     "SI lower SGPR spill instructions", false, false)
 
@@ -259,51 +271,86 @@ bool SILowerSGPRSpills::spillCalleeSavedRegs(
   return false;
 }
 
-void SILowerSGPRSpills::extendWWMVirtRegLiveness(MachineFunction &MF,
-                                                 LiveIntervals *LIS) {
-  // TODO: This is a workaround to avoid the unmodelled liveness computed with
-  // whole-wave virtual registers when allocated together with the regular VGPR
-  // virtual registers. Presently, the liveness computed during the regalloc is
-  // only uniform (or single lane aware) and it doesn't take account of the
-  // divergent control flow that exists for our GPUs. Since the WWM registers
-  // can modify inactive lanes, the wave-aware liveness should be computed for
-  // the virtual registers to accurately plot their interferences. Without
-  // having the divergent CFG for the function, it is difficult to implement the
-  // wave-aware liveness info. Until then, we conservatively extend the liveness
-  // of the wwm registers into the entire function so that they won't be reused
-  // without first spilling/splitting their liveranges.
-  SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
-
-  // Insert the IMPLICIT_DEF for the wwm-registers in the entry blocks.
-  for (auto Reg : MFI->getSGPRSpillVGPRs()) {
-    for (MachineBasicBlock *SaveBlock : SaveBlocks) {
-      MachineBasicBlock::iterator InsertBefore = SaveBlock->begin();
-      DebugLoc DL = SaveBlock->findDebugLoc(InsertBefore);
-      auto MIB = BuildMI(*SaveBlock, InsertBefore, DL,
-                         TII->get(AMDGPU::IMPLICIT_DEF), Reg);
-      MFI->setFlag(Reg, AMDGPU::VirtRegFlag::WWM_REG);
-      // Set SGPR_SPILL asm printer flag
-      MIB->setAsmPrinterFlag(AMDGPU::SGPR_SPILL);
-      if (LIS) {
-        LIS->InsertMachineInstrInMaps(*MIB);
+void SILowerSGPRSpills::updateLaneVGPRDomInstr(
+    int FI, MachineBasicBlock *MBB, MachineBasicBlock::iterator InsertPt,
+    DenseMap<Register, MachineBasicBlock::iterator> &LaneVGPRDomInstr) {
+  // For the Def of a virtual LaneVPGR to dominate all its uses, we should
+  // insert an IMPLICIT_DEF before the dominating spill. Switching to a
+  // depth first order doesn't really help since the machine function can be in
+  // the unstructured control flow post-SSA. For each virtual register, hence
+  // finding the common dominator to get either the dominating spill or a block
+  // dominating all spills.
+  SIMachineFunctionInfo *FuncInfo =
+      MBB->getParent()->getInfo<SIMachineFunctionInfo>();
+  ArrayRef<SIRegisterInfo::SpilledReg> VGPRSpills =
+      FuncInfo->getSGPRSpillToVirtualVGPRLanes(FI);
+  Register PrevLaneVGPR;
+  for (auto &Spill : VGPRSpills) {
+    if (PrevLaneVGPR == Spill.VGPR)
+      continue;
+
+    PrevLaneVGPR = Spill.VGPR;
+    auto I = LaneVGPRDomInstr.find(Spill.VGPR);
+    if (Spill.Lane == 0 && I == LaneVGPRDomInstr.end()) {
+      // Initially add the spill instruction itself for Insertion point.
+      LaneVGPRDomInstr[Spill.VGPR] = InsertPt;
+    } else {
+      assert(I != LaneVGPRDomInstr.end());
+      auto PrevInsertPt = I->second;
+      MachineBasicBlock *DomMBB = PrevInsertPt->getParent();
+      if (DomMBB == MBB) {
+        // The insertion point earlier selected in a predecessor block whose
+        // spills are currently being lowered. The earlier InsertPt would be
+        // the one just before the block terminator and it should be changed
+        // if we insert any new spill in it.
+        if (MDT->dominates(&*InsertPt, &*PrevInsertPt))
+          I->second = InsertPt;
+
+        continue;
       }
+
+      // Find the common dominator block between PrevInsertPt and the
+      // current spill.
+      DomMBB = MDT->findNearestCommonDominator(DomMBB, MBB);
+      if (DomMBB == MBB)
+        I->second = InsertPt;
+      else if (DomMBB != PrevInsertPt->getParent())
+        I->second = &(*DomMBB->getFirstTerminator());
     }
   }
+}
 
-  // Insert the KILL in the return blocks to extend their liveness untill the
-  // end of function. Insert a separate KILL for each VGPR.
-  for (MachineBasicBlock *RestoreBlock : RestoreBlocks) {
-    MachineBasicBlock::iterator InsertBefore =
-        RestoreBlock->getFirstTerminator();
-    DebugLoc DL = RestoreBlock->findDebugLoc(InsertBefore);
-    for (auto Reg : MFI->getSGPRSpillVGPRs()) {
-      auto MIB = BuildMI(*RestoreBlock, InsertBefore, DL,
-                         TII->get(TargetOpcode::KILL));
-      MIB.addReg(Reg);
-      if (LIS)
-        LIS->InsertMachineInstrInMaps(*MIB);
+void SILowerSGPRSpills::determineRegsForWWMAllocation(MachineFunction &MF,
+                                                      BitVector &RegMask) {
+  // Determine an optimal number of VGPRs for WWM allocation. The complement
+  // list will be available for allocating other VGPR virtual registers.
+  SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  BitVector ReservedRegs = TRI->getReservedRegs(MF);
+  BitVector NonWwmAllocMask(TRI->getNumRegs());
+
+  // FIXME: MaxNumVGPRsForWwmAllocation might need to be adjusted in the future
+  // to have a balanced allocation between WWM values and per-thread vector
+  // register operands.
+  unsigned NumRegs = MaxNumVGPRsForWwmAllocation;
+  NumRegs =
+      std::min(static_cast<unsigned>(MFI->getSGPRSpillVGPRs().size()), NumRegs);
+
+  auto [MaxNumVGPRs, MaxNumAGPRs] = TRI->getMaxNumVectorRegs(MF);
+  // Try to use the highest available registers for now. Later after
+  // vgpr-regalloc, they can be shifted to the lowest range.
+  unsigned I = 0;
+  for (unsigned Reg = AMDGPU::VGPR0 + MaxNumVGPRs - 1;
+       (I < NumRegs) && (Reg >= AMDGPU::VGPR0); --Reg) {
+    if (!ReservedRegs.test(Reg) &&
+        !MRI.isPhysRegUsed(Reg, /*SkipRegMaskTest=*/true)) {
+      TRI->markSuperRegs(RegMask, Reg);
+      ++I;
     }
   }
+
+  assert(I == NumRegs &&
+         "Failed to find enough VGPRs for whole-wave register allocation");
 }
 
 bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
@@ -313,6 +360,7 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
 
   LIS = getAnalysisIfAvailable<LiveIntervals>();
   Indexes = getAnalysisIfAvailable<SlotIndexes>();
+  MDT = &getAnalysis<MachineDominatorTree>();
 
   assert(SaveBlocks.empty() && RestoreBlocks.empty());
 
@@ -349,6 +397,9 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
     // To track the spill frame indices handled in this pass.
     BitVector SpillFIs(MFI.getObjectIndexEnd(), false);
 
+    // To track the IMPLICIT_DEF insertion point for the lane vgprs.
+    DenseMap<Register, MachineBasicBlock::iterator> LaneVGPRDomInstr;
+
     for (MachineBasicBlock &MBB : MF) {
       for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
         if (!TII->isSGPRSpill(MI))
@@ -378,6 +429,7 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
                   "failed to spill SGPR to physical VGPR lane when allocated");
           }
         } else {
+          MachineInstrSpan MIS(&MI, &MBB);
           if (FuncInfo->allocateSGPRSpillToVGPRLane(MF, FI)) {
             bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex(
                 MI, FI, nullptr, Indexes, LIS);
@@ -385,21 +437,47 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
               llvm_unreachable(
                   "failed to spill SGPR to virtual VGPR lane when allocated");
             SpillFIs.set(FI);
+            updateLaneVGPRDomInstr(FI, &MBB, MIS.begin(), LaneVGPRDomInstr);
             SpilledToVirtVGPRLanes = true;
           }
         }
       }
     }
 
-    if (SpilledToVirtVGPRLanes) {
-      extendWWMVirtRegLiveness(MF, LIS);
+    for (auto Reg : FuncInfo->getSGPRSpillVGPRs()) {
+      auto InsertPt = LaneVGPRDomInstr[Reg];
+      // Insert the IMPLICIT_DEF at the identified points.
+      MachineBasicBlock &Block = *InsertPt->getParent();
+      DebugLoc DL = Block.findDebugLoc(InsertPt);
+      auto MIB =
+          BuildMI(Block, *InsertPt, DL, TII->get(AMDGPU::IMPLICIT_DEF), Reg);
+
+      // Add WWM flag to the virtual register.
+      FuncInfo->setFlag(Reg, AMDGPU::VirtRegFlag::WWM_REG);
+
+      // Set SGPR_SPILL asm printer flag
+      MIB->setAsmPrinterFlag(AMDGPU::SGPR_SPILL);
       if (LIS) {
-        // Compute the LiveInterval for the newly created virtual registers.
-        for (auto Reg : FuncInfo->getSGPRSpillVGPRs())
-          LIS->createAndComputeVirtRegInterval(Reg);
+        LIS->InsertMachineInstrInMaps(*MIB);
+        LIS->createAndComputeVirtRegInterval(Reg);
       }
     }
 
+    // Determine the registers for WWM allocation and also compute the register
+    // mask for non-wwm VGPR allocation.
+    if (FuncInfo->getSGPRSpillVGPRs().size()) {
+      BitVector WwmRegMask(TRI->getNumRegs());
+
+      determineRegsForWWMAllocation(MF, WwmRegMask);
+
+      BitVector NonWwmRegMask(WwmRegMask);
+      NonWwmRegMask.flip().clearBitsNotInMask(TRI->getAllVGPRRegMask());
+
+      // The complement set will be the registers for non-wwm (per-thread) vgpr
+      // allocation.
+      FuncInfo->updateNonWWMRegMask(NonWwmRegMask);
+    }
+
     for (MachineBasicBlock &MBB : MF) {
       // FIXME: The dead frame indices are replaced with a null register from
       // the debug value instructions. We should instead, update it with the
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 072c5aedc220b..a1415b94ae7c7 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -315,12 +315,13 @@ bool SIMachineFunctionInfo::isCalleeSavedReg(const MCPhysReg *CSRegs,
   return false;
 }
 
-void SIMachineFunctionInfo::shiftSpillPhysVGPRsToLowestRange(
-    MachineFunction &MF) {
+void SIMachineFunctionInfo::shiftWwmVGPRsToLowestRange(
+    MachineFunction &MF, SmallVectorImpl<Register> &WWMVGPRs,
+    BitVector &SavedVGPRs) {
   const SIRegisterInfo *TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
   MachineRegisterInfo &MRI = MF.getRegInfo();
-  for (unsigned I = 0, E = SpillPhysVGPRs.size(); I < E; ++I) {
-    Register Reg = SpillPhysVGPRs[I];
+  for (unsigned I = 0, E = WWMVGPRs.size(); I < E; ++I) {
+    Register Reg = WWMVGPRs[I];
     Register NewReg =
         TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
     if (!NewReg || NewReg >= Reg)
@@ -329,11 +330,21 @@ void SIMachineFunctionInfo::shiftSpillPhysVGPRsToLowestRange(
     MRI.replaceRegWith(Reg, NewReg);
 
     // Update various tables with the new VGPR.
-    SpillPhysVGPRs[I] = NewReg;
+    WWMVGPRs[I] = NewReg;
     WWMReservedRegs.remove(Reg);
     WWMReservedRegs.insert(NewReg);
-    WWMSpills.insert(std::make_pair(NewReg, WWMSpills[Reg]));
-    WWMSpills.erase(Reg);
+
+    // Replace the register in SpillPhysVGPRs. This is needed to look for free
+    // lanes while spilling special SGPRs like FP, BP, etc. during PEI.
+    auto RegItr = std::find(SpillPhysVGPRs.begin(), SpillPhysVGPRs.end(), Reg);
+    if (RegItr != SpillPhysVGPRs.end()) {
+      unsigned Idx = std::distance(SpillPhysVGPRs.begin(), RegItr);
+      SpillPhysVGPRs[Idx] = NewReg;
+    }
+
+    // The generic `determineCalleeSaves` might have set the old register if it
+    // is in the CSR range.
+    SavedVGPRs.reset(Reg);
 
     for (MachineBasicBlock &MBB : MF) {
       MBB.removeLiveIn(Reg);
@@ -377,7 +388,9 @@ bool SIMachineFunctionInfo::allocatePhysicalVGPRForSGPRSpills(
       return false;
     }
 
-    allocateWWMSpill(MF, LaneVGPR);
+    if (IsPrologEpilog)
+      allocateWWMSpill(MF, LaneVGPR);
+
     reserveWWMRegister(LaneVGPR);
     for (MachineBasicBlock &MBB : MF) {
       MBB.addLiveIn(LaneVGPR);
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 8c014832f5e46..22d6a99aa11f5 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -515,6 +515,11 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
   // the VGPR and its stack slot index.
   WWMSpillsMap WWMSpills;
 
+  // Before allocation, the VGPR registers are partitioned into two distinct
+  // sets, the first one for WWM-values and the second set for non-WWM values.
+  // The latter set should be reserved during WWM-regalloc.
+  BitVector NonWWMRegMask;
+
   using ReservedRegSet = SmallSetVector<Register, 8>;
   // To track the VGPRs reserved for WWM instructions. They get stack slots
   // later during PrologEpilogInserter and get added into the superset WWMSpills
@@ -581,6 +586,10 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
 
   void reserveWWMRegister(Register Reg) { WWMReservedRegs.insert(Reg); }
 
+  void updateNonWWMRegMask(BitVector &RegMask) { NonWWMRegMask = RegMask; }
+  BitVector getNonWWMRegMask() const { return NonWWMRegMask; }
+  void clearNonWWMRegAllocMask() { NonWWMRegMask.clear(); }
+
   SIModeRegisterDefaults getMode() const { return Mode; }
 
   ArrayRef<SIRegisterInfo::SpilledReg>
@@ -723,9 +732,11 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
       I->second.IsDead = true;
   }
 
-  // To bring the Physical VGPRs in the highest range allocated for CSR SGPR
-  // spilling into the lowest available range.
-  void shiftSpillPhysVGPRsToLowestRange(MachineFunction &MF);
+  // To bring the allocated WWM registers in \p WWMVGPRs to the lowest available
+  // range.
+  void shiftWwmVGPRsToLowestRange(MachineFunction &MF,
+                                  SmallVectorImpl<Register> &WWMVGPRs,
+                                  BitVector &SavedVGPRs);
 
   bool allocateSGPRSpillToVGPRLane(MachineFunction &MF, int FI,
                                    bool SpillToPhysVGPRLane = false,
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index ddb5f71935685..b397c4f6ca589 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -561,6 +561,37 @@ MCRegister SIRegisterInfo::reservedPrivateSegmentBufferReg(
   return getAlignedHighSGPRForRC(MF, /*Align=*/4, &AMDGPU::SGPR_128RegClass);
 }
 
+std::pair<unsigned, unsigned>
+SIRegisterInfo::getMaxNumVectorRegs(const MachineFunction &MF) const {
+  const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+  unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF);
+  unsigned MaxNumAGPRs = MaxNumVGPRs;
+  unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
+
+  // On GFX90A, the number of VGPRs and AGPRs need not be equal. Theoretically,
+  // a wave may have up to 512 total vector registers combining together both
+  // VGPRs and AGPRs. Hence, in an entry function without calls and without
+  // AGPRs used within it, it is possible to use the whole vector register
+  // budget for VGPRs.
+  //
+  // TODO: it shall be possible to estimate maximum AGPR/VGPR pressure and split
+  //       register file accordingly.
+  if (ST.hasGFX90AInsts()) {
+    if (MFI->usesAGPRs(MF)) {
+      MaxNumVGPRs /= 2;
+      MaxNumAGPRs = MaxNumVGPRs;
+    } else {
+      if (MaxNumVGPRs > TotalNumVGPRs) {
+        MaxNumAGPRs = MaxNumVGPRs - TotalNumVGPRs;
+        MaxNumVGPRs = TotalNumVGPRs;
+      } else
+        MaxNumAGPRs = 0;
+    }
+  }
+
+  return std::pair(MaxNumVGPRs, MaxNumAGPRs);
+}
+
 BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   BitVector Reserved(getNumRegs());
   Reserved.set(AMDGPU::MODE);
@@ -668,30 +699,7 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
 
   // Reserve VGPRs/AGPRs.
   //
-  unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF);
-  unsigned MaxNumAGPRs = MaxNumVGPRs;
-  unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
-
-  // On GFX90A, the number of VGPRs and AGPRs need not be equal. Theoretically,
-  // a wave may have up to 512 total vector registers combining together both
-  // VGPRs and AGPRs. Hence, in an entry function without calls and without
-  // AGPRs used within it, it is possible to use the whole vector register
-  // budget for VGPRs.
-  //
-  // TODO: it shall be possible to estimate maximum AGPR/VGPR pressure and split
-  //       register file accordingly.
-  if (ST.hasGFX90AInsts()) {
-    if (MFI->usesAGPRs(MF)) {
-      MaxNumVGPRs /= 2;
-      MaxNumAGPRs = MaxNumVGPRs;
-    } else {
-      if (MaxNumVGPRs > TotalNumVGPRs) {
-        MaxNumAGPRs = MaxNumVGPRs - TotalNumVGPRs;
-        MaxNumVGPRs = TotalNumVGPRs;
-      } else
-        MaxNumAGPRs = 0;
-    }
-  }
+  auto [MaxNumVGPRs, MaxNumAGPRs] = getMaxNumVectorRegs(MF);
 
   for (const TargetRegisterClass *RC : regclasses()) {
     if (RC->isBaseClass() && isVGPRClass(RC)) {
@@ -724,6 +732,18 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
     reserveRegisterTuples(Reserved, MFI->getVGPRForAGPRCopy());
   }
 
+  // During wwm-regalloc, reserve the registers for perlane VGPR allocation. The
+  // MFI->getNonWWMRegMask() field will have a valid bitmask only during
+  // wwm-regalloc and it would be empty otherwise.
+  BitVector NonWWMRegMask = MFI->getNonWWMRegMask();
+  if (!NonWWMRegMask.empty()) {
+    for (unsigned RegI = AMDGPU::VGPR0, RegE = AMDGPU::VGPR0 + MaxNumVGPRs;
+         RegI < RegE; ++RegI) {
+      if (NonWWMRegMask.test(RegI))
+        reserveRegisterTuples(Reserved, RegI);
+    }
+  }
+
   for (Register Reg : MFI->getWWMReservedRegs())
     reserveRegisterTuples(Reserved, Reg);
 
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
index 88d5686720985..409e5418abc8e 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -82,6 +82,11 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
   /// spilling is needed.
   MCRegister reservedPrivateSegmentBufferReg(const MachineFunction &MF) const;
 
+  /// Return a pair of maximum numbers of VGPRs and AGPRs that meet the number
+  /// of waves per execution unit required for the function \p MF.
+  std::pair<unsigned, unsigned>
+  getMaxNumVectorRegs(const MachineFunction &MF) const;
+
   BitVector getReservedRegs(const MachineFunction &MF) const override;
   bool isAsmClobberable(const MachineFunction &MF,
                         MCRegister PhysReg) const override;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll
index 10cbc56cc5fbe..c9426106af5da 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll
@@ -12,97 +12,90 @@ define <4 x float> @waterfall_loop(<8 x i32> %vgpr_srd) {
 ; CHECK:       ; %bb.0: ; %bb
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; CHECK-NEXT:    s_xor_saveexec_b32 s4, -1
-; CHECK-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
-; CHECK-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
+; CHECK-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
 ; CHECK-NEXT:    s_mov_b32 exec_lo, s4
-; CHECK-NEXT:    ; implicit-def: $vgpr8 : SGPR spill to VGPR lane
-; CHECK-NEXT:    v_mov_b32_e32 v8, v0
-; CHECK-NEXT:    s_or_saveexec_b32 s21, -1
-; CHECK-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
-; CHECK-NEXT:    s_mov_b32 exec_lo, s21
-; CHECK-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
-; CHECK-NEXT:    v_mov_b32_e32 v15, v1
-; CHECK-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
-; CHECK-NEXT:    buffer_store_dword v15, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
-; CHECK-NEXT:    v_mov_b32_e32 v14, v2
-; CHECK-NEXT:    buffer_store_dword v14, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
-; CHECK-NEXT:    v_mov_b32_e32 v13, v3
-; CHECK-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
-; CHECK-NEXT:    v_mov_b32_e32 v12, v4
-; CHECK-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
-; CHECK-NEXT:    v_mov_b32_e32 v11, v5
-; CHECK-NEXT:    buffer_store_dword v11, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
-; CHECK-NEXT:    v_mov_b32_e32 v10, v6
-; CHECK-NEXT:    buffer_store_dword v10, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
-; CHECK-NEXT:    v_mov_b32_e32 v9, v7
-; CHECK-NEXT:    buffer_store_dword v9, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
-; CHECK-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 killed $exec
-; CHECK-NEXT:    v_mov_b32_e32 v2, v15
-; CHECK-NEXT:    v_mov_b32_e32 v3, v14
-; CHECK-NEXT:    v_mov_b32_e32 v4, v13
-; CHECK-NEXT:    v_mov_b32_e32 v5, v12
-; CHECK-NEXT:    v_mov_b32_e32 v6, v11
-; CHECK-NEXT:    v_mov_b32_e32 v7, v10
-; CHECK-NEXT:    v_mov_b32_e32 v8, v9
-; CHECK-NEXT:    s_waitcnt vmcnt(0)
-; CHECK-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
-; CHECK-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
-; CHECK-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
-; CHECK-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
-; CHECK-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
-; CHECK-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
-; CHECK-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
-; CHECK-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
+; CHECK-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
+; CHECK-NEXT:    v_mov_b32_e32 v14, v1
+; CHECK-NEXT:    buffer_store_dword v14, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
+; CHECK-NEXT:    v_mov_b32_e32 v13, v2
+; CHECK-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
+; CHECK-NEXT:    v_mov_b32_e32 v12, v3
+; CHECK-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
+; CHECK-NEXT:    v_mov_b32_e32 v11, v4
+; CHECK-NEXT:    buffer_store_dword v11, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
+; CHECK-NEXT:    v_mov_b32_e32 v10, v5
+; CHECK-NEXT:    buffer_store_dword v10, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
+; CHECK-NEXT:    v_mov_b32_e32 v9, v6
+; CHECK-NEXT:    buffer_store_dword v9, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
+; CHECK-NEXT:    v_mov_b32_e32 v8, v7
+; CHECK-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
+; CHECK-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 killed $exec
+; CHECK-NEXT:    v_mov_b32_e32 v1, v14
+; CHECK-NEXT:    v_mov_b32_e32 v2, v13
+; CHECK-NEXT:    v_mov_b32_e32 v3, v12
+; CHECK-NEXT:    v_mov_b32_e32 v4, v11
+; CHECK-NEXT:    v_mov_b32_e32 v5, v10
+; CHECK-NEXT:    v_mov_b32_e32 v6, v9
+; CHECK-NEXT:    v_mov_b32_e32 v7, v8
+; CHECK-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; CHECK-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
+; CHECK-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
+; CHECK-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
+; CHECK-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
+; CHECK-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
+; CHECK-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
+; CHECK-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
 ; CHECK-NEXT:    s_mov_b32 s8, 0
 ; CHECK-NEXT:    s_mov_b32 s4, s8
 ; CHECK-NEXT:    s_mov_b32 s5, s8
 ; CHECK-NEXT:    s_mov_b32 s6, s8
 ; CHECK-NEXT:    s_mov_b32 s7, s8
-; CHECK-NEXT:    v_writelane_b32 v0, s4, 0
-; CHECK-NEXT:    v_writelane_b32 v0, s5, 1
-; CHECK-NEXT:    v_writelane_b32 v0, s6, 2
-; CHECK-NEXT:    v_writelane_b32 v0, s7, 3
+; CHECK-NEXT:    ; implicit-def: $vgpr16 : SGPR spill to VGPR lane
+; CHECK-NEXT:    v_writelane_b32 v16, s4, 0
+; CHECK-NEXT:    v_writelane_b32 v16, s5, 1
+; CHECK-NEXT:    v_writelane_b32 v16, s6, 2
+; CHECK-NEXT:    v_writelane_b32 v16, s7, 3
 ; CHECK-NEXT:    s_mov_b32 s6, 0
 ; CHECK-NEXT:    s_mov_b32 s4, s6
 ; CHECK-NEXT:    s_mov_b32 s5, s6
-; CHECK-NEXT:    v_mov_b32_e32 v1, s4
-; CHECK-NEXT:    v_mov_b32_e32 v2, s5
-; CHECK-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
-; CHECK-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
+; CHECK-NEXT:    v_mov_b32_e32 v0, s4
+; CHECK-NEXT:    v_mov_b32_e32 v1, s5
+; CHECK-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; CHECK-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
 ; CHECK-NEXT:    s_mov_b32 s4, exec_lo
-; CHECK-NEXT:    v_writelane_b32 v0, s4, 4
+; CHECK-NEXT:    v_writelane_b32 v16, s4, 4
 ; CHECK-NEXT:    s_or_saveexec_b32 s21, -1
-; CHECK-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; CHECK-NEXT:    buffer_store_dword v16, off, s[0:3], s32 ; 4-byte Folded Spill
 ; CHECK-NEXT:    s_mov_b32 exec_lo, s21
 ; CHECK-NEXT:  .LBB0_1: ; =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    s_or_saveexec_b32 s21, -1
-; CHECK-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; CHECK-NEXT:    buffer_load_dword v16, off, s[0:3], s32 ; 4-byte Folded Reload
 ; CHECK-NEXT:    s_mov_b32 exec_lo, s21
-; CHECK-NEXT:    buffer_load_dword v9, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
-; CHECK-NEXT:    buffer_load_dword v10, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
-; CHECK-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
-; CHECK-NEXT:    buffer_load_dword v12, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
-; CHECK-NEXT:    buffer_load_dword v13, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
-; CHECK-NEXT:    buffer_load_dword v14, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
-; CHECK-NEXT:    buffer_load_dword v15, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
-; CHECK-NEXT:    buffer_load_dword v16, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
-; CHECK-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
-; CHECK-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
-; CHECK-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
-; CHECK-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
-; CHECK-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
-; CHECK-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
-; CHECK-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
-; CHECK-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
+; CHECK-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
+; CHECK-NEXT:    buffer_load_dword v9, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
+; CHECK-NEXT:    buffer_load_dword v10, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
+; CHECK-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
+; CHECK-NEXT:    buffer_load_dword v12, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
+; CHECK-NEXT:    buffer_load_dword v13, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
+; CHECK-NEXT:    buffer_load_dword v14, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
+; CHECK-NEXT:    buffer_load_dword v15, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
+; CHECK-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
+; CHECK-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
+; CHECK-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
+; CHECK-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
+; CHECK-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
+; CHECK-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
+; CHECK-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
+; CHECK-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
 ; CHECK-NEXT:    s_waitcnt vmcnt(0)
-; CHECK-NEXT:    v_readfirstlane_b32 s12, v8
-; CHECK-NEXT:    v_readfirstlane_b32 s10, v7
-; CHECK-NEXT:    v_readfirstlane_b32 s9, v6
-; CHECK-NEXT:    v_readfirstlane_b32 s8, v5
-; CHECK-NEXT:    v_readfirstlane_b32 s7, v4
-; CHECK-NEXT:    v_readfirstlane_b32 s6, v3
-; CHECK-NEXT:    v_readfirstlane_b32 s5, v2
-; CHECK-NEXT:    v_readfirstlane_b32 s4, v1
+; CHECK-NEXT:    v_readfirstlane_b32 s12, v7
+; CHECK-NEXT:    v_readfirstlane_b32 s10, v6
+; CHECK-NEXT:    v_readfirstlane_b32 s9, v5
+; CHECK-NEXT:    v_readfirstlane_b32 s8, v4
+; CHECK-NEXT:    v_readfirstlane_b32 s7, v3
+; CHECK-NEXT:    v_readfirstlane_b32 s6, v2
+; CHECK-NEXT:    v_readfirstlane_b32 s5, v1
+; CHECK-NEXT:    v_readfirstlane_b32 s4, v0
 ; CHECK-NEXT:    ; kill: def $sgpr12 killed $sgpr12 def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
 ; CHECK-NEXT:    s_mov_b32 s13, s10
 ; CHECK-NEXT:    s_mov_b32 s14, s9
@@ -111,56 +104,56 @@ define <4 x float> @waterfall_loop(<8 x i32> %vgpr_srd) {
 ; CHECK-NEXT:    s_mov_b32 s17, s6
 ; CHECK-NEXT:    s_mov_b32 s18, s5
 ; CHECK-NEXT:    s_mov_b32 s19, s4
-; CHECK-NEXT:    v_writelane_b32 v0, s12, 5
-; CHECK-NEXT:    v_writelane_b32 v0, s13, 6
-; CHECK-NEXT:    v_writelane_b32 v0, s14, 7
-; CHECK-NEXT:    v_writelane_b32 v0, s15, 8
-; CHECK-NEXT:    v_writelane_b32 v0, s16, 9
-; CHECK-NEXT:    v_writelane_b32 v0, s17, 10
-; CHECK-NEXT:    v_writelane_b32 v0, s18, 11
-; CHECK-NEXT:    v_writelane_b32 v0, s19, 12
+; CHECK-NEXT:    v_writelane_b32 v16, s12, 5
+; CHECK-NEXT:    v_writelane_b32 v16, s13, 6
+; CHECK-NEXT:    v_writelane_b32 v16, s14, 7
+; CHECK-NEXT:    v_writelane_b32 v16, s15, 8
+; CHECK-NEXT:    v_writelane_b32 v16, s16, 9
+; CHECK-NEXT:    v_writelane_b32 v16, s17, 10
+; CHECK-NEXT:    v_writelane_b32 v16, s18, 11
+; CHECK-NEXT:    v_writelane_b32 v16, s19, 12
+; CHECK-NEXT:    v_mov_b32_e32 v6, v8
 ; CHECK-NEXT:    v_mov_b32_e32 v7, v9
-; CHECK-NEXT:    v_mov_b32_e32 v8, v10
+; CHECK-NEXT:    v_mov_b32_e32 v4, v10
 ; CHECK-NEXT:    v_mov_b32_e32 v5, v11
-; CHECK-NEXT:    v_mov_b32_e32 v6, v12
+; CHECK-NEXT:    v_mov_b32_e32 v2, v12
 ; CHECK-NEXT:    v_mov_b32_e32 v3, v13
-; CHECK-NEXT:    v_mov_b32_e32 v4, v14
+; CHECK-NEXT:    v_mov_b32_e32 v0, v14
 ; CHECK-NEXT:    v_mov_b32_e32 v1, v15
-; CHECK-NEXT:    v_mov_b32_e32 v2, v16
 ; CHECK-NEXT:    s_mov_b64 s[4:5], s[12:13]
 ; CHECK-NEXT:    s_mov_b64 s[10:11], s[14:15]
 ; CHECK-NEXT:    s_mov_b64 s[8:9], s[16:17]
 ; CHECK-NEXT:    s_mov_b64 s[6:7], s[18:19]
-; CHECK-NEXT:    v_cmp_eq_u64_e64 s4, s[4:5], v[7:8]
-; CHECK-NEXT:    v_cmp_eq_u64_e64 s5, s[10:11], v[5:6]
+; CHECK-NEXT:    v_cmp_eq_u64_e64 s4, s[4:5], v[6:7]
+; CHECK-NEXT:    v_cmp_eq_u64_e64 s5, s[10:11], v[4:5]
 ; CHECK-NEXT:    s_and_b32 s4, s4, s5
-; CHECK-NEXT:    v_cmp_eq_u64_e64 s5, s[8:9], v[3:4]
+; CHECK-NEXT:    v_cmp_eq_u64_e64 s5, s[8:9], v[2:3]
 ; CHECK-NEXT:    s_and_b32 s4, s4, s5
-; CHECK-NEXT:    v_cmp_eq_u64_e64 s5, s[6:7], v[1:2]
+; CHECK-NEXT:    v_cmp_eq_u64_e64 s5, s[6:7], v[0:1]
 ; CHECK-NEXT:    s_and_b32 s4, s4, s5
 ; CHECK-NEXT:    s_and_saveexec_b32 s4, s4
-; CHECK-NEXT:    v_writelane_b32 v0, s4, 13
+; CHECK-NEXT:    v_writelane_b32 v16, s4, 13
 ; CHECK-NEXT:    s_or_saveexec_b32 s21, -1
-; CHECK-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; CHECK-NEXT:    buffer_store_dword v16, off, s[0:3], s32 ; 4-byte Folded Spill
 ; CHECK-NEXT:    s_mov_b32 exec_lo, s21
 ; CHECK-NEXT:  ; %bb.2: ; in Loop: Header=BB0_1 Depth=1
 ; CHECK-NEXT:    s_or_saveexec_b32 s21, -1
-; CHECK-NEXT:    buffer_load_dword v2, off, s[0:3], s32 ; 4-byte Folded Reload
+; CHECK-NEXT:    buffer_load_dword v16, off, s[0:3], s32 ; 4-byte Folded Reload
 ; CHECK-NEXT:    s_mov_b32 exec_lo, s21
 ; CHECK-NEXT:    s_waitcnt vmcnt(0)
-; CHECK-NEXT:    v_readlane_b32 s4, v2, 13
-; CHECK-NEXT:    v_readlane_b32 s8, v2, 5
-; CHECK-NEXT:    v_readlane_b32 s9, v2, 6
-; CHECK-NEXT:    v_readlane_b32 s10, v2, 7
-; CHECK-NEXT:    v_readlane_b32 s11, v2, 8
-; CHECK-NEXT:    v_readlane_b32 s12, v2, 9
-; CHECK-NEXT:    v_readlane_b32 s13, v2, 10
-; CHECK-NEXT:    v_readlane_b32 s14, v2, 11
-; CHECK-NEXT:    v_readlane_b32 s15, v2, 12
-; CHECK-NEXT:    v_readlane_b32 s16, v2, 0
-; CHECK-NEXT:    v_readlane_b32 s17, v2, 1
-; CHECK-NEXT:    v_readlane_b32 s18, v2, 2
-; CHECK-NEXT:    v_readlane_b32 s19, v2, 3
+; CHECK-NEXT:    v_readlane_b32 s4, v16, 13
+; CHECK-NEXT:    v_readlane_b32 s8, v16, 5
+; CHECK-NEXT:    v_readlane_b32 s9, v16, 6
+; CHECK-NEXT:    v_readlane_b32 s10, v16, 7
+; CHECK-NEXT:    v_readlane_b32 s11, v16, 8
+; CHECK-NEXT:    v_readlane_b32 s12, v16, 9
+; CHECK-NEXT:    v_readlane_b32 s13, v16, 10
+; CHECK-NEXT:    v_readlane_b32 s14, v16, 11
+; CHECK-NEXT:    v_readlane_b32 s15, v16, 12
+; CHECK-NEXT:    v_readlane_b32 s16, v16, 0
+; CHECK-NEXT:    v_readlane_b32 s17, v16, 1
+; CHECK-NEXT:    v_readlane_b32 s18, v16, 2
+; CHECK-NEXT:    v_readlane_b32 s19, v16, 3
 ; CHECK-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
 ; CHECK-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
 ; CHECK-NEXT:    s_waitcnt vmcnt(0)
@@ -171,25 +164,19 @@ define <4 x float> @waterfall_loop(<8 x i32> %vgpr_srd) {
 ; CHECK-NEXT:    s_cbranch_execnz .LBB0_1
 ; CHECK-NEXT:  ; %bb.3:
 ; CHECK-NEXT:    s_or_saveexec_b32 s21, -1
-; CHECK-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; CHECK-NEXT:    buffer_load_dword v16, off, s[0:3], s32 ; 4-byte Folded Reload
 ; CHECK-NEXT:    s_mov_b32 exec_lo, s21
 ; CHECK-NEXT:    s_waitcnt vmcnt(0)
-; CHECK-NEXT:    v_readlane_b32 s4, v0, 4
+; CHECK-NEXT:    v_readlane_b32 s4, v16, 4
 ; CHECK-NEXT:    s_mov_b32 exec_lo, s4
 ; CHECK-NEXT:  ; %bb.4:
-; CHECK-NEXT:    s_or_saveexec_b32 s21, -1
-; CHECK-NEXT:    buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload
-; CHECK-NEXT:    s_mov_b32 exec_lo, s21
 ; CHECK-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
 ; CHECK-NEXT:    ; implicit-def: $sgpr4
 ; CHECK-NEXT:    v_mov_b32_e32 v1, s4
 ; CHECK-NEXT:    v_mov_b32_e32 v2, s4
 ; CHECK-NEXT:    v_mov_b32_e32 v3, s4
-; CHECK-NEXT:    ; kill: killed $vgpr4
 ; CHECK-NEXT:    s_xor_saveexec_b32 s4, -1
-; CHECK-NEXT:    s_waitcnt vmcnt(0)
-; CHECK-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
-; CHECK-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload
+; CHECK-NEXT:    buffer_load_dword v16, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
 ; CHECK-NEXT:    s_mov_b32 exec_lo, s4
 ; CHECK-NEXT:    s_waitcnt vmcnt(0)
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll
index cbee039df7fd0..e5f3b773b2771 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll
@@ -50,8 +50,8 @@ define amdgpu_kernel void @set_inactive_scc(ptr addrspace(1) %out, i32 %in, <4 x
 ; GCN-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN-NEXT:    s_cmp_lg_u32 s2, 56
-; GCN-NEXT:    s_cselect_b32 s4, 1, 0
 ; GCN-NEXT:    v_mov_b32_e32 v0, s3
+; GCN-NEXT:    s_cselect_b32 s4, 1, 0
 ; GCN-NEXT:    s_not_b64 exec, exec
 ; GCN-NEXT:    v_mov_b32_e32 v0, 42
 ; GCN-NEXT:    s_not_b64 exec, exec
diff --git a/llvm/test/CodeGen/AMDGPU/alloc-aligned-tuples-gfx908.mir b/llvm/test/CodeGen/AMDGPU/alloc-aligned-tuples-gfx908.mir
index 80923dfc6f522..3c3c9839755a2 100644
--- a/llvm/test/CodeGen/AMDGPU/alloc-aligned-tuples-gfx908.mir
+++ b/llvm/test/CodeGen/AMDGPU/alloc-aligned-tuples-gfx908.mir
@@ -1,4 +1,4 @@
-# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -start-before=greedy,0 -stop-after=virtregrewriter,1 -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GCN,GFX908 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -start-before=greedy,0 -stop-after=virtregrewriter,2 -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GCN,GFX908 %s
 
 ---
 # GCN-LABEL: name: alloc_vgpr_64
diff --git a/llvm/test/CodeGen/AMDGPU/alloc-aligned-tuples-gfx90a.mir b/llvm/test/CodeGen/AMDGPU/alloc-aligned-tuples-gfx90a.mir
index 0f0cd0e8171d1..c42b570b40812 100644
--- a/llvm/test/CodeGen/AMDGPU/alloc-aligned-tuples-gfx90a.mir
+++ b/llvm/test/CodeGen/AMDGPU/alloc-aligned-tuples-gfx90a.mir
@@ -1,4 +1,4 @@
-# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -start-before=greedy,0 -stop-after=virtregrewriter,1 -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GCN,GFX90A %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -start-before=greedy,0 -stop-after=virtregrewriter,2 -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GCN,GFX90A %s
 # Using the unaligned vector tuples are OK as long as they aren't used
 # in a real instruction.
 
diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll
index 29704959fc176..e7627c1dca6df 100644
--- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll
@@ -276,8 +276,8 @@ define amdgpu_ps void @add_i32_varying(ptr addrspace(8) inreg %out, ptr addrspac
 ; GFX8-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX8-NEXT:    s_mov_b64 exec, s[10:11]
 ; GFX8-NEXT:    v_mbcnt_lo_u32_b32 v3, exec_lo, 0
-; GFX8-NEXT:    v_mbcnt_hi_u32_b32 v3, exec_hi, v3
 ; GFX8-NEXT:    v_mov_b32_e32 v2, v0
+; GFX8-NEXT:    v_mbcnt_hi_u32_b32 v3, exec_hi, v3
 ; GFX8-NEXT:    s_not_b64 exec, exec
 ; GFX8-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX8-NEXT:    s_not_b64 exec, exec
@@ -333,8 +333,8 @@ define amdgpu_ps void @add_i32_varying(ptr addrspace(8) inreg %out, ptr addrspac
 ; GFX9-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX9-NEXT:    s_mov_b64 exec, s[10:11]
 ; GFX9-NEXT:    v_mbcnt_lo_u32_b32 v3, exec_lo, 0
-; GFX9-NEXT:    v_mbcnt_hi_u32_b32 v3, exec_hi, v3
 ; GFX9-NEXT:    v_mov_b32_e32 v2, v0
+; GFX9-NEXT:    v_mbcnt_hi_u32_b32 v3, exec_hi, v3
 ; GFX9-NEXT:    s_not_b64 exec, exec
 ; GFX9-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX9-NEXT:    s_not_b64 exec, exec
@@ -410,8 +410,8 @@ define amdgpu_ps void @add_i32_varying(ptr addrspace(8) inreg %out, ptr addrspac
 ; GFX1064-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
 ; GFX1064-NEXT:    s_or_saveexec_b64 s[10:11], -1
 ; GFX1064-NEXT:    v_readlane_b32 s12, v1, 63
-; GFX1064-NEXT:    v_readlane_b32 s14, v1, 47
 ; GFX1064-NEXT:    v_writelane_b32 v3, s13, 32
+; GFX1064-NEXT:    v_readlane_b32 s14, v1, 47
 ; GFX1064-NEXT:    s_mov_b64 exec, s[10:11]
 ; GFX1064-NEXT:    v_mbcnt_hi_u32_b32 v0, exec_hi, v0
 ; GFX1064-NEXT:    s_or_saveexec_b64 s[10:11], -1
@@ -463,8 +463,8 @@ define amdgpu_ps void @add_i32_varying(ptr addrspace(8) inreg %out, ptr addrspac
 ; GFX1032-NEXT:    v_mov_b32_e32 v2, v1
 ; GFX1032-NEXT:    v_permlanex16_b32 v2, v2, -1, -1
 ; GFX1032-NEXT:    v_add_nc_u32_dpp v1, v2, v1 quad_perm:[0,1,2,3] row_mask:0xa bank_mask:0xf
-; GFX1032-NEXT:    v_readlane_b32 s11, v1, 31
 ; GFX1032-NEXT:    v_mov_b32_dpp v3, v1 row_shr:1 row_mask:0xf bank_mask:0xf
+; GFX1032-NEXT:    v_readlane_b32 s11, v1, 31
 ; GFX1032-NEXT:    v_readlane_b32 s10, v1, 15
 ; GFX1032-NEXT:    s_mov_b32 exec_lo, s9
 ; GFX1032-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
@@ -536,14 +536,15 @@ define amdgpu_ps void @add_i32_varying(ptr addrspace(8) inreg %out, ptr addrspac
 ; GFX1164-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
 ; GFX1164-NEXT:    s_or_saveexec_b64 s[10:11], -1
 ; GFX1164-NEXT:    v_readlane_b32 s12, v1, 63
-; GFX1164-NEXT:    v_readlane_b32 s14, v1, 47
 ; GFX1164-NEXT:    v_writelane_b32 v3, s13, 32
+; GFX1164-NEXT:    v_readlane_b32 s14, v1, 47
 ; GFX1164-NEXT:    s_mov_b64 exec, s[10:11]
-; GFX1164-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
+; GFX1164-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX1164-NEXT:    v_mbcnt_hi_u32_b32 v0, exec_hi, v0
 ; GFX1164-NEXT:    s_or_saveexec_b64 s[10:11], -1
 ; GFX1164-NEXT:    v_writelane_b32 v3, s14, 48
 ; GFX1164-NEXT:    s_mov_b64 exec, s[10:11]
+; GFX1164-NEXT:    s_delay_alu instid0(VALU_DEP_2)
 ; GFX1164-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
 ; GFX1164-NEXT:    ; implicit-def: $vgpr0
 ; GFX1164-NEXT:    s_and_saveexec_b64 s[10:11], vcc
@@ -598,8 +599,8 @@ define amdgpu_ps void @add_i32_varying(ptr addrspace(8) inreg %out, ptr addrspac
 ; GFX1132-NEXT:    v_permlanex16_b32 v2, v2, -1, -1
 ; GFX1132-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1132-NEXT:    v_add_nc_u32_dpp v1, v2, v1 quad_perm:[0,1,2,3] row_mask:0xa bank_mask:0xf
-; GFX1132-NEXT:    v_readlane_b32 s11, v1, 31
 ; GFX1132-NEXT:    v_mov_b32_dpp v3, v1 row_shr:1 row_mask:0xf bank_mask:0xf
+; GFX1132-NEXT:    v_readlane_b32 s11, v1, 31
 ; GFX1132-NEXT:    v_readlane_b32 s10, v1, 15
 ; GFX1132-NEXT:    s_mov_b32 exec_lo, s9
 ; GFX1132-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
diff --git a/llvm/test/CodeGen/AMDGPU/bb-prolog-spill-during-regalloc.ll b/llvm/test/CodeGen/AMDGPU/bb-prolog-spill-during-regalloc.ll
index 3ed2cb856eaea..9988b2fa1eaf0 100644
--- a/llvm/test/CodeGen/AMDGPU/bb-prolog-spill-during-regalloc.ll
+++ b/llvm/test/CodeGen/AMDGPU/bb-prolog-spill-during-regalloc.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -O0 -verify-machineinstrs --stop-after=regallocfast,1 -o - %s | FileCheck -check-prefix=REGALLOC %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -O0 -verify-machineinstrs --stop-after=regallocfast,2 -o - %s | FileCheck -check-prefix=REGALLOC %s
 
 ; Test to check if the bb prolog spills are inserted correctly during regalloc.
 define i32 @prolog_spill(i32 %arg0, i32 %arg1, i32 %arg2) {
@@ -8,22 +8,20 @@ define i32 @prolog_spill(i32 %arg0, i32 %arg1, i32 %arg2) {
   ; REGALLOC-NEXT:   successors: %bb.3(0x40000000), %bb.1(0x40000000)
   ; REGALLOC-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
   ; REGALLOC-NEXT: {{  $}}
-  ; REGALLOC-NEXT:   renamable $vgpr3 = IMPLICIT_DEF
   ; REGALLOC-NEXT:   SI_SPILL_V32_SAVE killed $vgpr2, %stack.5, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5)
   ; REGALLOC-NEXT:   SI_SPILL_V32_SAVE killed $vgpr1, %stack.4, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
-  ; REGALLOC-NEXT:   renamable $vgpr1 = COPY $vgpr0
-  ; REGALLOC-NEXT:   $vgpr0 = SI_SPILL_WWM_V32_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
   ; REGALLOC-NEXT:   renamable $sgpr4 = S_MOV_B32 49
-  ; REGALLOC-NEXT:   renamable $sgpr4_sgpr5 = V_CMP_GT_I32_e64 killed $vgpr1, killed $sgpr4, implicit $exec
+  ; REGALLOC-NEXT:   renamable $sgpr4_sgpr5 = V_CMP_GT_I32_e64 killed $vgpr0, killed $sgpr4, implicit $exec
   ; REGALLOC-NEXT:   renamable $sgpr6 = IMPLICIT_DEF
-  ; REGALLOC-NEXT:   renamable $vgpr1 = COPY killed renamable $sgpr6
-  ; REGALLOC-NEXT:   SI_SPILL_V32_SAVE killed $vgpr1, %stack.3, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
+  ; REGALLOC-NEXT:   renamable $vgpr0 = COPY killed renamable $sgpr6
+  ; REGALLOC-NEXT:   SI_SPILL_V32_SAVE killed $vgpr0, %stack.3, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
   ; REGALLOC-NEXT:   renamable $sgpr6_sgpr7 = COPY $exec, implicit-def $exec
   ; REGALLOC-NEXT:   renamable $sgpr4_sgpr5 = S_AND_B64 renamable $sgpr6_sgpr7, killed renamable $sgpr4_sgpr5, implicit-def dead $scc
   ; REGALLOC-NEXT:   renamable $sgpr6_sgpr7 = S_XOR_B64 renamable $sgpr4_sgpr5, killed renamable $sgpr6_sgpr7, implicit-def dead $scc
-  ; REGALLOC-NEXT:   renamable $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr6, 0, $vgpr0, implicit-def $sgpr6_sgpr7, implicit $sgpr6_sgpr7
-  ; REGALLOC-NEXT:   renamable $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr7, 1, $vgpr0, implicit killed $sgpr6_sgpr7
-  ; REGALLOC-NEXT:   SI_SPILL_WWM_V32_SAVE killed $vgpr0, %stack.2, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+  ; REGALLOC-NEXT:   $vgpr63 = IMPLICIT_DEF
+  ; REGALLOC-NEXT:   $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr6, 0, $vgpr63, implicit-def $sgpr6_sgpr7, implicit $sgpr6_sgpr7
+  ; REGALLOC-NEXT:   $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr7, 1, $vgpr63, implicit killed $sgpr6_sgpr7
+  ; REGALLOC-NEXT:   SI_SPILL_WWM_V32_SAVE killed $vgpr63, %stack.2, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
   ; REGALLOC-NEXT:   $exec = S_MOV_B64_term killed renamable $sgpr4_sgpr5
   ; REGALLOC-NEXT:   S_CBRANCH_EXECZ %bb.1, implicit $exec
   ; REGALLOC-NEXT:   S_BRANCH %bb.3
@@ -31,16 +29,16 @@ define i32 @prolog_spill(i32 %arg0, i32 %arg1, i32 %arg2) {
   ; REGALLOC-NEXT: bb.1.Flow:
   ; REGALLOC-NEXT:   successors: %bb.2(0x40000000), %bb.4(0x40000000)
   ; REGALLOC-NEXT: {{  $}}
-  ; REGALLOC-NEXT:   $vgpr0 = SI_SPILL_WWM_V32_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
-  ; REGALLOC-NEXT:   $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr4_sgpr5
-  ; REGALLOC-NEXT:   $sgpr5 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 1
+  ; REGALLOC-NEXT:   $vgpr63 = SI_SPILL_WWM_V32_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+  ; REGALLOC-NEXT:   $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr63, 0, implicit-def $sgpr4_sgpr5
+  ; REGALLOC-NEXT:   $sgpr5 = SI_RESTORE_S32_FROM_VGPR $vgpr63, 1
   ; REGALLOC-NEXT:   renamable $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 killed renamable $sgpr4_sgpr5, implicit-def $exec, implicit-def dead $scc, implicit $exec
-  ; REGALLOC-NEXT:   $vgpr1 = SI_SPILL_V32_RESTORE %stack.3, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
-  ; REGALLOC-NEXT:   SI_SPILL_V32_SAVE killed $vgpr1, %stack.6, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5)
+  ; REGALLOC-NEXT:   $vgpr0 = SI_SPILL_V32_RESTORE %stack.3, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
+  ; REGALLOC-NEXT:   SI_SPILL_V32_SAVE killed $vgpr0, %stack.6, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5)
   ; REGALLOC-NEXT:   renamable $sgpr4_sgpr5 = S_AND_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc
-  ; REGALLOC-NEXT:   renamable $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr4, 2, $vgpr0, implicit-def $sgpr4_sgpr5, implicit $sgpr4_sgpr5
-  ; REGALLOC-NEXT:   renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr5, 3, $vgpr0, implicit $sgpr4_sgpr5
-  ; REGALLOC-NEXT:   SI_SPILL_WWM_V32_SAVE killed $vgpr0, %stack.2, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+  ; REGALLOC-NEXT:   $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr4, 2, $vgpr63, implicit-def $sgpr4_sgpr5, implicit $sgpr4_sgpr5
+  ; REGALLOC-NEXT:   $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr5, 3, $vgpr63, implicit $sgpr4_sgpr5
+  ; REGALLOC-NEXT:   SI_SPILL_WWM_V32_SAVE killed $vgpr63, %stack.2, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
   ; REGALLOC-NEXT:   $exec = S_XOR_B64_term $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc
   ; REGALLOC-NEXT:   S_CBRANCH_EXECZ %bb.4, implicit $exec
   ; REGALLOC-NEXT:   S_BRANCH %bb.2
@@ -64,13 +62,12 @@ define i32 @prolog_spill(i32 %arg0, i32 %arg1, i32 %arg2) {
   ; REGALLOC-NEXT:   S_BRANCH %bb.1
   ; REGALLOC-NEXT: {{  $}}
   ; REGALLOC-NEXT: bb.4.bb.3:
-  ; REGALLOC-NEXT:   $vgpr1 = SI_SPILL_WWM_V32_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
-  ; REGALLOC-NEXT:   $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 2, implicit-def $sgpr4_sgpr5
-  ; REGALLOC-NEXT:   $sgpr5 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 3
+  ; REGALLOC-NEXT:   $vgpr63 = SI_SPILL_WWM_V32_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+  ; REGALLOC-NEXT:   $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr63, 2, implicit-def $sgpr4_sgpr5
+  ; REGALLOC-NEXT:   $sgpr5 = SI_RESTORE_S32_FROM_VGPR killed $vgpr63, 3
   ; REGALLOC-NEXT:   $exec = S_OR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc
   ; REGALLOC-NEXT:   $vgpr0 = SI_SPILL_V32_RESTORE %stack.6, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5)
   ; REGALLOC-NEXT:   renamable $vgpr0 = V_LSHL_ADD_U32_e64 killed $vgpr0, 2, $vgpr0, implicit $exec
-  ; REGALLOC-NEXT:   KILL killed renamable $vgpr1
   ; REGALLOC-NEXT:   SI_RETURN implicit killed $vgpr0
 bb.0:
   %cmp = icmp slt i32 %arg0, 50
diff --git a/llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll b/llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll
index 3c8ea61b0d43b..2d3d7fafa72c0 100644
--- a/llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll
+++ b/llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll
@@ -36,66 +36,56 @@ define amdgpu_kernel void @test_loop(ptr addrspace(3) %ptr, i32 %n) nounwind {
 ; GCN_DBG-NEXT:    s_mov_b32 s15, 0xe8f000
 ; GCN_DBG-NEXT:    s_add_u32 s12, s12, s9
 ; GCN_DBG-NEXT:    s_addc_u32 s13, s13, 0
-; GCN_DBG-NEXT:    ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
 ; GCN_DBG-NEXT:    s_load_dword s0, s[2:3], 0x9
+; GCN_DBG-NEXT:    ; implicit-def: $vgpr2 : SGPR spill to VGPR lane
 ; GCN_DBG-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN_DBG-NEXT:    v_writelane_b32 v0, s0, 0
+; GCN_DBG-NEXT:    v_writelane_b32 v2, s0, 0
 ; GCN_DBG-NEXT:    s_load_dword s1, s[2:3], 0xa
 ; GCN_DBG-NEXT:    s_mov_b32 s0, 0
 ; GCN_DBG-NEXT:    s_mov_b32 s2, -1
 ; GCN_DBG-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN_DBG-NEXT:    s_cmp_lg_u32 s1, s2
-; GCN_DBG-NEXT:    v_writelane_b32 v0, s0, 1
+; GCN_DBG-NEXT:    v_writelane_b32 v2, s0, 1
 ; GCN_DBG-NEXT:    s_mov_b64 s[4:5], exec
 ; GCN_DBG-NEXT:    s_mov_b64 exec, -1
-; GCN_DBG-NEXT:    buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill
+; GCN_DBG-NEXT:    buffer_store_dword v2, off, s[12:15], 0 ; 4-byte Folded Spill
 ; GCN_DBG-NEXT:    s_mov_b64 exec, s[4:5]
 ; GCN_DBG-NEXT:    s_cbranch_scc1 .LBB0_2
 ; GCN_DBG-NEXT:  ; %bb.1: ; %for.exit
-; GCN_DBG-NEXT:    s_or_saveexec_b64 s[4:5], -1
-; GCN_DBG-NEXT:    s_waitcnt expcnt(0)
-; GCN_DBG-NEXT:    buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
-; GCN_DBG-NEXT:    s_mov_b64 exec, s[4:5]
-; GCN_DBG-NEXT:    ; kill: killed $vgpr0
 ; GCN_DBG-NEXT:    s_endpgm
 ; GCN_DBG-NEXT:  .LBB0_2: ; %for.body
 ; GCN_DBG-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GCN_DBG-NEXT:    s_or_saveexec_b64 s[4:5], -1
 ; GCN_DBG-NEXT:    s_waitcnt expcnt(0)
-; GCN_DBG-NEXT:    buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
+; GCN_DBG-NEXT:    buffer_load_dword v2, off, s[12:15], 0 ; 4-byte Folded Reload
 ; GCN_DBG-NEXT:    s_mov_b64 exec, s[4:5]
 ; GCN_DBG-NEXT:    s_waitcnt vmcnt(0)
-; GCN_DBG-NEXT:    v_readlane_b32 s0, v0, 1
-; GCN_DBG-NEXT:    v_readlane_b32 s2, v0, 0
+; GCN_DBG-NEXT:    v_readlane_b32 s0, v2, 1
+; GCN_DBG-NEXT:    v_readlane_b32 s2, v2, 0
 ; GCN_DBG-NEXT:    s_mov_b32 s1, 2
 ; GCN_DBG-NEXT:    s_lshl_b32 s1, s0, s1
 ; GCN_DBG-NEXT:    s_add_i32 s1, s1, s2
 ; GCN_DBG-NEXT:    s_mov_b32 s2, 0x80
 ; GCN_DBG-NEXT:    s_add_i32 s1, s1, s2
 ; GCN_DBG-NEXT:    s_mov_b32 m0, -1
-; GCN_DBG-NEXT:    v_mov_b32_e32 v1, s1
-; GCN_DBG-NEXT:    ds_read_b32 v1, v1
+; GCN_DBG-NEXT:    v_mov_b32_e32 v0, s1
+; GCN_DBG-NEXT:    ds_read_b32 v0, v0
 ; GCN_DBG-NEXT:    s_mov_b32 s2, 1.0
 ; GCN_DBG-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN_DBG-NEXT:    v_add_f32_e64 v2, v1, s2
+; GCN_DBG-NEXT:    v_add_f32_e64 v1, v0, s2
 ; GCN_DBG-NEXT:    s_mov_b32 m0, -1
-; GCN_DBG-NEXT:    v_mov_b32_e32 v1, s1
-; GCN_DBG-NEXT:    ds_write_b32 v1, v2
+; GCN_DBG-NEXT:    v_mov_b32_e32 v0, s1
+; GCN_DBG-NEXT:    ds_write_b32 v0, v1
 ; GCN_DBG-NEXT:    s_mov_b32 s1, 1
 ; GCN_DBG-NEXT:    s_add_i32 s0, s0, s1
 ; GCN_DBG-NEXT:    s_mov_b64 s[2:3], -1
 ; GCN_DBG-NEXT:    s_and_b64 vcc, exec, s[2:3]
-; GCN_DBG-NEXT:    v_writelane_b32 v0, s0, 1
+; GCN_DBG-NEXT:    v_writelane_b32 v2, s0, 1
 ; GCN_DBG-NEXT:    s_or_saveexec_b64 s[4:5], -1
-; GCN_DBG-NEXT:    buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill
+; GCN_DBG-NEXT:    buffer_store_dword v2, off, s[12:15], 0 ; 4-byte Folded Spill
 ; GCN_DBG-NEXT:    s_mov_b64 exec, s[4:5]
 ; GCN_DBG-NEXT:    s_cbranch_vccnz .LBB0_2
 ; GCN_DBG-NEXT:  ; %bb.3: ; %DummyReturnBlock
-; GCN_DBG-NEXT:    s_or_saveexec_b64 s[4:5], -1
-; GCN_DBG-NEXT:    s_waitcnt expcnt(0)
-; GCN_DBG-NEXT:    buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
-; GCN_DBG-NEXT:    s_mov_b64 exec, s[4:5]
-; GCN_DBG-NEXT:    ; kill: killed $vgpr0
 ; GCN_DBG-NEXT:    s_endpgm
 entry:
   %cmp = icmp eq i32 %n, -1
@@ -144,53 +134,48 @@ define amdgpu_kernel void @loop_const_true(ptr addrspace(3) %ptr, i32 %n) nounwi
 ; GCN_DBG-NEXT:    s_mov_b32 s15, 0xe8f000
 ; GCN_DBG-NEXT:    s_add_u32 s12, s12, s9
 ; GCN_DBG-NEXT:    s_addc_u32 s13, s13, 0
-; GCN_DBG-NEXT:    ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
 ; GCN_DBG-NEXT:    s_load_dword s0, s[2:3], 0x9
+; GCN_DBG-NEXT:    ; implicit-def: $vgpr2 : SGPR spill to VGPR lane
 ; GCN_DBG-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN_DBG-NEXT:    v_writelane_b32 v0, s0, 0
+; GCN_DBG-NEXT:    v_writelane_b32 v2, s0, 0
 ; GCN_DBG-NEXT:    s_mov_b32 s0, 0
-; GCN_DBG-NEXT:    v_writelane_b32 v0, s0, 1
+; GCN_DBG-NEXT:    v_writelane_b32 v2, s0, 1
 ; GCN_DBG-NEXT:    s_or_saveexec_b64 s[4:5], -1
-; GCN_DBG-NEXT:    buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill
+; GCN_DBG-NEXT:    buffer_store_dword v2, off, s[12:15], 0 ; 4-byte Folded Spill
 ; GCN_DBG-NEXT:    s_mov_b64 exec, s[4:5]
 ; GCN_DBG-NEXT:    s_branch .LBB1_2
 ; GCN_DBG-NEXT:  .LBB1_1: ; %for.exit
-; GCN_DBG-NEXT:    s_or_saveexec_b64 s[4:5], -1
-; GCN_DBG-NEXT:    s_waitcnt expcnt(0)
-; GCN_DBG-NEXT:    buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
-; GCN_DBG-NEXT:    s_mov_b64 exec, s[4:5]
-; GCN_DBG-NEXT:    ; kill: killed $vgpr0
 ; GCN_DBG-NEXT:    s_endpgm
 ; GCN_DBG-NEXT:  .LBB1_2: ; %for.body
 ; GCN_DBG-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GCN_DBG-NEXT:    s_or_saveexec_b64 s[4:5], -1
 ; GCN_DBG-NEXT:    s_waitcnt expcnt(0)
-; GCN_DBG-NEXT:    buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
+; GCN_DBG-NEXT:    buffer_load_dword v2, off, s[12:15], 0 ; 4-byte Folded Reload
 ; GCN_DBG-NEXT:    s_mov_b64 exec, s[4:5]
 ; GCN_DBG-NEXT:    s_waitcnt vmcnt(0)
-; GCN_DBG-NEXT:    v_readlane_b32 s0, v0, 1
-; GCN_DBG-NEXT:    v_readlane_b32 s2, v0, 0
+; GCN_DBG-NEXT:    v_readlane_b32 s0, v2, 1
+; GCN_DBG-NEXT:    v_readlane_b32 s2, v2, 0
 ; GCN_DBG-NEXT:    s_mov_b32 s1, 2
 ; GCN_DBG-NEXT:    s_lshl_b32 s1, s0, s1
 ; GCN_DBG-NEXT:    s_add_i32 s1, s1, s2
 ; GCN_DBG-NEXT:    s_mov_b32 s2, 0x80
 ; GCN_DBG-NEXT:    s_add_i32 s1, s1, s2
 ; GCN_DBG-NEXT:    s_mov_b32 m0, -1
-; GCN_DBG-NEXT:    v_mov_b32_e32 v1, s1
-; GCN_DBG-NEXT:    ds_read_b32 v1, v1
+; GCN_DBG-NEXT:    v_mov_b32_e32 v0, s1
+; GCN_DBG-NEXT:    ds_read_b32 v0, v0
 ; GCN_DBG-NEXT:    s_mov_b32 s2, 1.0
 ; GCN_DBG-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN_DBG-NEXT:    v_add_f32_e64 v2, v1, s2
+; GCN_DBG-NEXT:    v_add_f32_e64 v1, v0, s2
 ; GCN_DBG-NEXT:    s_mov_b32 m0, -1
-; GCN_DBG-NEXT:    v_mov_b32_e32 v1, s1
-; GCN_DBG-NEXT:    ds_write_b32 v1, v2
+; GCN_DBG-NEXT:    v_mov_b32_e32 v0, s1
+; GCN_DBG-NEXT:    ds_write_b32 v0, v1
 ; GCN_DBG-NEXT:    s_mov_b32 s1, 1
 ; GCN_DBG-NEXT:    s_add_i32 s0, s0, s1
 ; GCN_DBG-NEXT:    s_mov_b64 s[2:3], 0
 ; GCN_DBG-NEXT:    s_and_b64 vcc, exec, s[2:3]
-; GCN_DBG-NEXT:    v_writelane_b32 v0, s0, 1
+; GCN_DBG-NEXT:    v_writelane_b32 v2, s0, 1
 ; GCN_DBG-NEXT:    s_or_saveexec_b64 s[4:5], -1
-; GCN_DBG-NEXT:    buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill
+; GCN_DBG-NEXT:    buffer_store_dword v2, off, s[12:15], 0 ; 4-byte Folded Spill
 ; GCN_DBG-NEXT:    s_mov_b64 exec, s[4:5]
 ; GCN_DBG-NEXT:    s_cbranch_vccnz .LBB1_1
 ; GCN_DBG-NEXT:    s_branch .LBB1_2
@@ -232,53 +217,48 @@ define amdgpu_kernel void @loop_const_false(ptr addrspace(3) %ptr, i32 %n) nounw
 ; GCN_DBG-NEXT:    s_mov_b32 s15, 0xe8f000
 ; GCN_DBG-NEXT:    s_add_u32 s12, s12, s9
 ; GCN_DBG-NEXT:    s_addc_u32 s13, s13, 0
-; GCN_DBG-NEXT:    ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
 ; GCN_DBG-NEXT:    s_load_dword s0, s[2:3], 0x9
+; GCN_DBG-NEXT:    ; implicit-def: $vgpr2 : SGPR spill to VGPR lane
 ; GCN_DBG-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN_DBG-NEXT:    v_writelane_b32 v0, s0, 0
+; GCN_DBG-NEXT:    v_writelane_b32 v2, s0, 0
 ; GCN_DBG-NEXT:    s_mov_b32 s0, 0
-; GCN_DBG-NEXT:    v_writelane_b32 v0, s0, 1
+; GCN_DBG-NEXT:    v_writelane_b32 v2, s0, 1
 ; GCN_DBG-NEXT:    s_or_saveexec_b64 s[4:5], -1
-; GCN_DBG-NEXT:    buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill
+; GCN_DBG-NEXT:    buffer_store_dword v2, off, s[12:15], 0 ; 4-byte Folded Spill
 ; GCN_DBG-NEXT:    s_mov_b64 exec, s[4:5]
 ; GCN_DBG-NEXT:    s_branch .LBB2_2
 ; GCN_DBG-NEXT:  .LBB2_1: ; %for.exit
-; GCN_DBG-NEXT:    s_or_saveexec_b64 s[4:5], -1
-; GCN_DBG-NEXT:    s_waitcnt expcnt(0)
-; GCN_DBG-NEXT:    buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
-; GCN_DBG-NEXT:    s_mov_b64 exec, s[4:5]
-; GCN_DBG-NEXT:    ; kill: killed $vgpr0
 ; GCN_DBG-NEXT:    s_endpgm
 ; GCN_DBG-NEXT:  .LBB2_2: ; %for.body
 ; GCN_DBG-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GCN_DBG-NEXT:    s_or_saveexec_b64 s[4:5], -1
 ; GCN_DBG-NEXT:    s_waitcnt expcnt(0)
-; GCN_DBG-NEXT:    buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
+; GCN_DBG-NEXT:    buffer_load_dword v2, off, s[12:15], 0 ; 4-byte Folded Reload
 ; GCN_DBG-NEXT:    s_mov_b64 exec, s[4:5]
 ; GCN_DBG-NEXT:    s_waitcnt vmcnt(0)
-; GCN_DBG-NEXT:    v_readlane_b32 s0, v0, 1
-; GCN_DBG-NEXT:    v_readlane_b32 s2, v0, 0
+; GCN_DBG-NEXT:    v_readlane_b32 s0, v2, 1
+; GCN_DBG-NEXT:    v_readlane_b32 s2, v2, 0
 ; GCN_DBG-NEXT:    s_mov_b32 s1, 2
 ; GCN_DBG-NEXT:    s_lshl_b32 s1, s0, s1
 ; GCN_DBG-NEXT:    s_add_i32 s1, s1, s2
 ; GCN_DBG-NEXT:    s_mov_b32 s2, 0x80
 ; GCN_DBG-NEXT:    s_add_i32 s1, s1, s2
 ; GCN_DBG-NEXT:    s_mov_b32 m0, -1
-; GCN_DBG-NEXT:    v_mov_b32_e32 v1, s1
-; GCN_DBG-NEXT:    ds_read_b32 v1, v1
+; GCN_DBG-NEXT:    v_mov_b32_e32 v0, s1
+; GCN_DBG-NEXT:    ds_read_b32 v0, v0
 ; GCN_DBG-NEXT:    s_mov_b32 s2, 1.0
 ; GCN_DBG-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN_DBG-NEXT:    v_add_f32_e64 v2, v1, s2
+; GCN_DBG-NEXT:    v_add_f32_e64 v1, v0, s2
 ; GCN_DBG-NEXT:    s_mov_b32 m0, -1
-; GCN_DBG-NEXT:    v_mov_b32_e32 v1, s1
-; GCN_DBG-NEXT:    ds_write_b32 v1, v2
+; GCN_DBG-NEXT:    v_mov_b32_e32 v0, s1
+; GCN_DBG-NEXT:    ds_write_b32 v0, v1
 ; GCN_DBG-NEXT:    s_mov_b32 s1, 1
 ; GCN_DBG-NEXT:    s_add_i32 s0, s0, s1
 ; GCN_DBG-NEXT:    s_mov_b64 s[2:3], -1
 ; GCN_DBG-NEXT:    s_and_b64 vcc, exec, s[2:3]
-; GCN_DBG-NEXT:    v_writelane_b32 v0, s0, 1
+; GCN_DBG-NEXT:    v_writelane_b32 v2, s0, 1
 ; GCN_DBG-NEXT:    s_or_saveexec_b64 s[4:5], -1
-; GCN_DBG-NEXT:    buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill
+; GCN_DBG-NEXT:    buffer_store_dword v2, off, s[12:15], 0 ; 4-byte Folded Spill
 ; GCN_DBG-NEXT:    s_mov_b64 exec, s[4:5]
 ; GCN_DBG-NEXT:    s_cbranch_vccnz .LBB2_1
 ; GCN_DBG-NEXT:    s_branch .LBB2_2
@@ -321,51 +301,46 @@ define amdgpu_kernel void @loop_const_undef(ptr addrspace(3) %ptr, i32 %n) nounw
 ; GCN_DBG-NEXT:    s_mov_b32 s15, 0xe8f000
 ; GCN_DBG-NEXT:    s_add_u32 s12, s12, s9
 ; GCN_DBG-NEXT:    s_addc_u32 s13, s13, 0
-; GCN_DBG-NEXT:    ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
 ; GCN_DBG-NEXT:    s_load_dword s0, s[2:3], 0x9
+; GCN_DBG-NEXT:    ; implicit-def: $vgpr2 : SGPR spill to VGPR lane
 ; GCN_DBG-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN_DBG-NEXT:    v_writelane_b32 v0, s0, 0
+; GCN_DBG-NEXT:    v_writelane_b32 v2, s0, 0
 ; GCN_DBG-NEXT:    s_mov_b32 s0, 0
-; GCN_DBG-NEXT:    v_writelane_b32 v0, s0, 1
+; GCN_DBG-NEXT:    v_writelane_b32 v2, s0, 1
 ; GCN_DBG-NEXT:    s_or_saveexec_b64 s[4:5], -1
-; GCN_DBG-NEXT:    buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill
+; GCN_DBG-NEXT:    buffer_store_dword v2, off, s[12:15], 0 ; 4-byte Folded Spill
 ; GCN_DBG-NEXT:    s_mov_b64 exec, s[4:5]
 ; GCN_DBG-NEXT:    s_branch .LBB3_2
 ; GCN_DBG-NEXT:  .LBB3_1: ; %for.exit
-; GCN_DBG-NEXT:    s_or_saveexec_b64 s[4:5], -1
-; GCN_DBG-NEXT:    s_waitcnt expcnt(0)
-; GCN_DBG-NEXT:    buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
-; GCN_DBG-NEXT:    s_mov_b64 exec, s[4:5]
-; GCN_DBG-NEXT:    ; kill: killed $vgpr0
 ; GCN_DBG-NEXT:    s_endpgm
 ; GCN_DBG-NEXT:  .LBB3_2: ; %for.body
 ; GCN_DBG-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GCN_DBG-NEXT:    s_or_saveexec_b64 s[4:5], -1
 ; GCN_DBG-NEXT:    s_waitcnt expcnt(0)
-; GCN_DBG-NEXT:    buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
+; GCN_DBG-NEXT:    buffer_load_dword v2, off, s[12:15], 0 ; 4-byte Folded Reload
 ; GCN_DBG-NEXT:    s_mov_b64 exec, s[4:5]
 ; GCN_DBG-NEXT:    s_waitcnt vmcnt(0)
-; GCN_DBG-NEXT:    v_readlane_b32 s0, v0, 1
-; GCN_DBG-NEXT:    v_readlane_b32 s2, v0, 0
+; GCN_DBG-NEXT:    v_readlane_b32 s0, v2, 1
+; GCN_DBG-NEXT:    v_readlane_b32 s2, v2, 0
 ; GCN_DBG-NEXT:    s_mov_b32 s1, 2
 ; GCN_DBG-NEXT:    s_lshl_b32 s1, s0, s1
 ; GCN_DBG-NEXT:    s_add_i32 s1, s1, s2
 ; GCN_DBG-NEXT:    s_mov_b32 s2, 0x80
 ; GCN_DBG-NEXT:    s_add_i32 s1, s1, s2
 ; GCN_DBG-NEXT:    s_mov_b32 m0, -1
-; GCN_DBG-NEXT:    v_mov_b32_e32 v1, s1
-; GCN_DBG-NEXT:    ds_read_b32 v1, v1
+; GCN_DBG-NEXT:    v_mov_b32_e32 v0, s1
+; GCN_DBG-NEXT:    ds_read_b32 v0, v0
 ; GCN_DBG-NEXT:    s_mov_b32 s2, 1.0
 ; GCN_DBG-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN_DBG-NEXT:    v_add_f32_e64 v2, v1, s2
+; GCN_DBG-NEXT:    v_add_f32_e64 v1, v0, s2
 ; GCN_DBG-NEXT:    s_mov_b32 m0, -1
-; GCN_DBG-NEXT:    v_mov_b32_e32 v1, s1
-; GCN_DBG-NEXT:    ds_write_b32 v1, v2
+; GCN_DBG-NEXT:    v_mov_b32_e32 v0, s1
+; GCN_DBG-NEXT:    ds_write_b32 v0, v1
 ; GCN_DBG-NEXT:    s_mov_b32 s1, 1
 ; GCN_DBG-NEXT:    s_add_i32 s0, s0, s1
-; GCN_DBG-NEXT:    v_writelane_b32 v0, s0, 1
+; GCN_DBG-NEXT:    v_writelane_b32 v2, s0, 1
 ; GCN_DBG-NEXT:    s_or_saveexec_b64 s[4:5], -1
-; GCN_DBG-NEXT:    buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill
+; GCN_DBG-NEXT:    buffer_store_dword v2, off, s[12:15], 0 ; 4-byte Folded Spill
 ; GCN_DBG-NEXT:    s_mov_b64 exec, s[4:5]
 ; GCN_DBG-NEXT:    s_cbranch_scc1 .LBB3_1
 ; GCN_DBG-NEXT:    s_branch .LBB3_2
@@ -422,66 +397,61 @@ define amdgpu_kernel void @loop_arg_0(ptr addrspace(3) %ptr, i32 %n) nounwind {
 ; GCN_DBG-NEXT:    s_mov_b32 s15, 0xe8f000
 ; GCN_DBG-NEXT:    s_add_u32 s12, s12, s9
 ; GCN_DBG-NEXT:    s_addc_u32 s13, s13, 0
-; GCN_DBG-NEXT:    ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
 ; GCN_DBG-NEXT:    s_load_dword s0, s[2:3], 0x9
+; GCN_DBG-NEXT:    ; implicit-def: $vgpr2 : SGPR spill to VGPR lane
 ; GCN_DBG-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN_DBG-NEXT:    v_writelane_b32 v0, s0, 0
-; GCN_DBG-NEXT:    v_mov_b32_e32 v1, 0
+; GCN_DBG-NEXT:    v_writelane_b32 v2, s0, 0
+; GCN_DBG-NEXT:    v_mov_b32_e32 v0, 0
 ; GCN_DBG-NEXT:    s_mov_b32 m0, -1
-; GCN_DBG-NEXT:    ds_read_u8 v1, v1
+; GCN_DBG-NEXT:    ds_read_u8 v0, v0
 ; GCN_DBG-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN_DBG-NEXT:    v_readfirstlane_b32 s0, v1
+; GCN_DBG-NEXT:    v_readfirstlane_b32 s0, v0
 ; GCN_DBG-NEXT:    s_and_b32 s0, 1, s0
 ; GCN_DBG-NEXT:    s_cmp_eq_u32 s0, 1
 ; GCN_DBG-NEXT:    s_cselect_b64 s[0:1], -1, 0
 ; GCN_DBG-NEXT:    s_mov_b64 s[2:3], -1
 ; GCN_DBG-NEXT:    s_xor_b64 s[0:1], s[0:1], s[2:3]
-; GCN_DBG-NEXT:    v_writelane_b32 v0, s0, 1
-; GCN_DBG-NEXT:    v_writelane_b32 v0, s1, 2
+; GCN_DBG-NEXT:    v_writelane_b32 v2, s0, 1
+; GCN_DBG-NEXT:    v_writelane_b32 v2, s1, 2
 ; GCN_DBG-NEXT:    s_mov_b32 s0, 0
-; GCN_DBG-NEXT:    v_writelane_b32 v0, s0, 3
+; GCN_DBG-NEXT:    v_writelane_b32 v2, s0, 3
 ; GCN_DBG-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; GCN_DBG-NEXT:    buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill
+; GCN_DBG-NEXT:    buffer_store_dword v2, off, s[12:15], 0 ; 4-byte Folded Spill
 ; GCN_DBG-NEXT:    s_mov_b64 exec, s[6:7]
 ; GCN_DBG-NEXT:    s_branch .LBB4_2
 ; GCN_DBG-NEXT:  .LBB4_1: ; %for.exit
-; GCN_DBG-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; GCN_DBG-NEXT:    s_waitcnt expcnt(0)
-; GCN_DBG-NEXT:    buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
-; GCN_DBG-NEXT:    s_mov_b64 exec, s[6:7]
-; GCN_DBG-NEXT:    ; kill: killed $vgpr0
 ; GCN_DBG-NEXT:    s_endpgm
 ; GCN_DBG-NEXT:  .LBB4_2: ; %for.body
 ; GCN_DBG-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GCN_DBG-NEXT:    s_or_saveexec_b64 s[6:7], -1
 ; GCN_DBG-NEXT:    s_waitcnt expcnt(0)
-; GCN_DBG-NEXT:    buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
+; GCN_DBG-NEXT:    buffer_load_dword v2, off, s[12:15], 0 ; 4-byte Folded Reload
 ; GCN_DBG-NEXT:    s_mov_b64 exec, s[6:7]
 ; GCN_DBG-NEXT:    s_waitcnt vmcnt(0)
-; GCN_DBG-NEXT:    v_readlane_b32 s0, v0, 3
-; GCN_DBG-NEXT:    v_readlane_b32 s2, v0, 1
-; GCN_DBG-NEXT:    v_readlane_b32 s3, v0, 2
-; GCN_DBG-NEXT:    v_readlane_b32 s4, v0, 0
+; GCN_DBG-NEXT:    v_readlane_b32 s0, v2, 3
+; GCN_DBG-NEXT:    v_readlane_b32 s2, v2, 1
+; GCN_DBG-NEXT:    v_readlane_b32 s3, v2, 2
+; GCN_DBG-NEXT:    v_readlane_b32 s4, v2, 0
 ; GCN_DBG-NEXT:    s_mov_b32 s1, 2
 ; GCN_DBG-NEXT:    s_lshl_b32 s1, s0, s1
 ; GCN_DBG-NEXT:    s_add_i32 s1, s1, s4
 ; GCN_DBG-NEXT:    s_mov_b32 s4, 0x80
 ; GCN_DBG-NEXT:    s_add_i32 s1, s1, s4
 ; GCN_DBG-NEXT:    s_mov_b32 m0, -1
-; GCN_DBG-NEXT:    v_mov_b32_e32 v1, s1
-; GCN_DBG-NEXT:    ds_read_b32 v1, v1
+; GCN_DBG-NEXT:    v_mov_b32_e32 v0, s1
+; GCN_DBG-NEXT:    ds_read_b32 v0, v0
 ; GCN_DBG-NEXT:    s_mov_b32 s4, 1.0
 ; GCN_DBG-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN_DBG-NEXT:    v_add_f32_e64 v2, v1, s4
+; GCN_DBG-NEXT:    v_add_f32_e64 v1, v0, s4
 ; GCN_DBG-NEXT:    s_mov_b32 m0, -1
-; GCN_DBG-NEXT:    v_mov_b32_e32 v1, s1
-; GCN_DBG-NEXT:    ds_write_b32 v1, v2
+; GCN_DBG-NEXT:    v_mov_b32_e32 v0, s1
+; GCN_DBG-NEXT:    ds_write_b32 v0, v1
 ; GCN_DBG-NEXT:    s_mov_b32 s1, 1
 ; GCN_DBG-NEXT:    s_add_i32 s0, s0, s1
 ; GCN_DBG-NEXT:    s_and_b64 vcc, exec, s[2:3]
-; GCN_DBG-NEXT:    v_writelane_b32 v0, s0, 3
+; GCN_DBG-NEXT:    v_writelane_b32 v2, s0, 3
 ; GCN_DBG-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; GCN_DBG-NEXT:    buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill
+; GCN_DBG-NEXT:    buffer_store_dword v2, off, s[12:15], 0 ; 4-byte Folded Spill
 ; GCN_DBG-NEXT:    s_mov_b64 exec, s[6:7]
 ; GCN_DBG-NEXT:    s_cbranch_vccnz .LBB4_1
 ; GCN_DBG-NEXT:    s_branch .LBB4_2
diff --git a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
index 6bc8d29b3bf7c..c25cd19c992f1 100644
--- a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
+++ b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
@@ -48,24 +48,20 @@ define amdgpu_kernel void @simple_nested_if(ptr addrspace(1) nocapture %arg) {
 ; GCN-O0-NEXT:    s_mov_b32 s15, 0xe8f000
 ; GCN-O0-NEXT:    s_add_u32 s12, s12, s9
 ; GCN-O0-NEXT:    s_addc_u32 s13, s13, 0
-; GCN-O0-NEXT:    ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
-; GCN-O0-NEXT:    v_mov_b32_e32 v1, v0
-; GCN-O0-NEXT:    s_or_saveexec_b64 s[8:9], -1
-; GCN-O0-NEXT:    buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
-; GCN-O0-NEXT:    s_mov_b64 exec, s[8:9]
 ; GCN-O0-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x9
-; GCN-O0-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN-O0-NEXT:    v_writelane_b32 v0, s0, 0
-; GCN-O0-NEXT:    v_writelane_b32 v0, s1, 1
-; GCN-O0-NEXT:    v_mov_b32_e32 v2, v1
-; GCN-O0-NEXT:    buffer_store_dword v2, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill
+; GCN-O0-NEXT:    ; implicit-def: $vgpr4 : SGPR spill to VGPR lane
+; GCN-O0-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-O0-NEXT:    v_writelane_b32 v4, s0, 0
+; GCN-O0-NEXT:    v_writelane_b32 v4, s1, 1
+; GCN-O0-NEXT:    v_mov_b32_e32 v1, v0
+; GCN-O0-NEXT:    buffer_store_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill
 ; GCN-O0-NEXT:    s_mov_b32 s0, 1
-; GCN-O0-NEXT:    v_cmp_gt_u32_e64 s[2:3], v1, s0
+; GCN-O0-NEXT:    v_cmp_gt_u32_e64 s[2:3], v0, s0
 ; GCN-O0-NEXT:    s_mov_b64 s[0:1], exec
-; GCN-O0-NEXT:    v_writelane_b32 v0, s0, 2
-; GCN-O0-NEXT:    v_writelane_b32 v0, s1, 3
+; GCN-O0-NEXT:    v_writelane_b32 v4, s0, 2
+; GCN-O0-NEXT:    v_writelane_b32 v4, s1, 3
 ; GCN-O0-NEXT:    s_or_saveexec_b64 s[8:9], -1
-; GCN-O0-NEXT:    buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill
+; GCN-O0-NEXT:    buffer_store_dword v4, off, s[12:15], 0 ; 4-byte Folded Spill
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[8:9]
 ; GCN-O0-NEXT:    s_and_b64 s[0:1], s[0:1], s[2:3]
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[0:1]
@@ -73,12 +69,12 @@ define amdgpu_kernel void @simple_nested_if(ptr addrspace(1) nocapture %arg) {
 ; GCN-O0-NEXT:  ; %bb.1: ; %bb.outer.then
 ; GCN-O0-NEXT:    s_or_saveexec_b64 s[8:9], -1
 ; GCN-O0-NEXT:    s_waitcnt expcnt(0)
-; GCN-O0-NEXT:    buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
+; GCN-O0-NEXT:    buffer_load_dword v4, off, s[12:15], 0 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[8:9]
 ; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
-; GCN-O0-NEXT:    v_readlane_b32 s4, v0, 0
-; GCN-O0-NEXT:    v_readlane_b32 s5, v0, 1
-; GCN-O0-NEXT:    buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
+; GCN-O0-NEXT:    v_readlane_b32 s4, v4, 0
+; GCN-O0-NEXT:    v_readlane_b32 s5, v4, 1
+; GCN-O0-NEXT:    buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    s_mov_b32 s2, 0xf000
 ; GCN-O0-NEXT:    s_mov_b32 s0, 0
 ; GCN-O0-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1
@@ -86,19 +82,19 @@ define amdgpu_kernel void @simple_nested_if(ptr addrspace(1) nocapture %arg) {
 ; GCN-O0-NEXT:    ; kill: def $sgpr4_sgpr5 killed $sgpr4_sgpr5 def $sgpr4_sgpr5_sgpr6_sgpr7
 ; GCN-O0-NEXT:    s_mov_b64 s[6:7], s[0:1]
 ; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
-; GCN-O0-NEXT:    v_ashrrev_i32_e64 v4, 31, v1
-; GCN-O0-NEXT:    v_mov_b32_e32 v2, v1
-; GCN-O0-NEXT:    v_mov_b32_e32 v3, v4
+; GCN-O0-NEXT:    v_ashrrev_i32_e64 v3, 31, v0
+; GCN-O0-NEXT:    v_mov_b32_e32 v1, v0
+; GCN-O0-NEXT:    v_mov_b32_e32 v2, v3
 ; GCN-O0-NEXT:    s_mov_b32 s0, 2
-; GCN-O0-NEXT:    v_lshl_b64 v[3:4], v[2:3], s0
-; GCN-O0-NEXT:    v_mov_b32_e32 v2, 0
-; GCN-O0-NEXT:    buffer_store_dword v2, v[3:4], s[4:7], 0 addr64
-; GCN-O0-NEXT:    v_cmp_ne_u32_e64 s[2:3], v1, s0
+; GCN-O0-NEXT:    v_lshl_b64 v[2:3], v[1:2], s0
+; GCN-O0-NEXT:    v_mov_b32_e32 v1, 0
+; GCN-O0-NEXT:    buffer_store_dword v1, v[2:3], s[4:7], 0 addr64
+; GCN-O0-NEXT:    v_cmp_ne_u32_e64 s[2:3], v0, s0
 ; GCN-O0-NEXT:    s_mov_b64 s[0:1], exec
-; GCN-O0-NEXT:    v_writelane_b32 v0, s0, 4
-; GCN-O0-NEXT:    v_writelane_b32 v0, s1, 5
+; GCN-O0-NEXT:    v_writelane_b32 v4, s0, 4
+; GCN-O0-NEXT:    v_writelane_b32 v4, s1, 5
 ; GCN-O0-NEXT:    s_or_saveexec_b64 s[8:9], -1
-; GCN-O0-NEXT:    buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill
+; GCN-O0-NEXT:    buffer_store_dword v4, off, s[12:15], 0 ; 4-byte Folded Spill
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[8:9]
 ; GCN-O0-NEXT:    s_and_b64 s[0:1], s[0:1], s[2:3]
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[0:1]
@@ -106,11 +102,11 @@ define amdgpu_kernel void @simple_nested_if(ptr addrspace(1) nocapture %arg) {
 ; GCN-O0-NEXT:  ; %bb.2: ; %bb.inner.then
 ; GCN-O0-NEXT:    s_or_saveexec_b64 s[8:9], -1
 ; GCN-O0-NEXT:    s_waitcnt expcnt(0)
-; GCN-O0-NEXT:    buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
+; GCN-O0-NEXT:    buffer_load_dword v4, off, s[12:15], 0 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[8:9]
 ; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
-; GCN-O0-NEXT:    v_readlane_b32 s0, v0, 0
-; GCN-O0-NEXT:    v_readlane_b32 s1, v0, 1
+; GCN-O0-NEXT:    v_readlane_b32 s0, v4, 0
+; GCN-O0-NEXT:    v_readlane_b32 s1, v4, 1
 ; GCN-O0-NEXT:    buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    v_mov_b32_e32 v0, 1
 ; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
@@ -130,26 +126,25 @@ define amdgpu_kernel void @simple_nested_if(ptr addrspace(1) nocapture %arg) {
 ; GCN-O0-NEXT:  .LBB0_3: ; %Flow
 ; GCN-O0-NEXT:    s_or_saveexec_b64 s[8:9], -1
 ; GCN-O0-NEXT:    s_waitcnt expcnt(0)
-; GCN-O0-NEXT:    buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
+; GCN-O0-NEXT:    buffer_load_dword v4, off, s[12:15], 0 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[8:9]
 ; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
-; GCN-O0-NEXT:    v_readlane_b32 s0, v0, 4
-; GCN-O0-NEXT:    v_readlane_b32 s1, v0, 5
+; GCN-O0-NEXT:    v_readlane_b32 s0, v4, 4
+; GCN-O0-NEXT:    v_readlane_b32 s1, v4, 5
 ; GCN-O0-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; GCN-O0-NEXT:  .LBB0_4: ; %bb.outer.end
 ; GCN-O0-NEXT:    s_or_saveexec_b64 s[8:9], -1
 ; GCN-O0-NEXT:    s_waitcnt expcnt(0)
-; GCN-O0-NEXT:    buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
+; GCN-O0-NEXT:    buffer_load_dword v4, off, s[12:15], 0 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[8:9]
 ; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
-; GCN-O0-NEXT:    v_readlane_b32 s0, v0, 2
-; GCN-O0-NEXT:    v_readlane_b32 s1, v0, 3
+; GCN-O0-NEXT:    v_readlane_b32 s0, v4, 2
+; GCN-O0-NEXT:    v_readlane_b32 s1, v4, 3
 ; GCN-O0-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GCN-O0-NEXT:    v_mov_b32_e32 v2, 3
-; GCN-O0-NEXT:    v_mov_b32_e32 v1, 0
+; GCN-O0-NEXT:    v_mov_b32_e32 v1, 3
+; GCN-O0-NEXT:    v_mov_b32_e32 v0, 0
 ; GCN-O0-NEXT:    s_mov_b32 m0, -1
-; GCN-O0-NEXT:    ds_write_b32 v1, v2
-; GCN-O0-NEXT:    ; kill: killed $vgpr0
+; GCN-O0-NEXT:    ds_write_b32 v0, v1
 ; GCN-O0-NEXT:    s_endpgm
 bb:
   %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
@@ -222,24 +217,20 @@ define amdgpu_kernel void @uncollapsable_nested_if(ptr addrspace(1) nocapture %a
 ; GCN-O0-NEXT:    s_mov_b32 s15, 0xe8f000
 ; GCN-O0-NEXT:    s_add_u32 s12, s12, s9
 ; GCN-O0-NEXT:    s_addc_u32 s13, s13, 0
-; GCN-O0-NEXT:    ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
-; GCN-O0-NEXT:    v_mov_b32_e32 v1, v0
-; GCN-O0-NEXT:    s_or_saveexec_b64 s[8:9], -1
-; GCN-O0-NEXT:    buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
-; GCN-O0-NEXT:    s_mov_b64 exec, s[8:9]
 ; GCN-O0-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x9
-; GCN-O0-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN-O0-NEXT:    v_writelane_b32 v0, s0, 0
-; GCN-O0-NEXT:    v_writelane_b32 v0, s1, 1
-; GCN-O0-NEXT:    v_mov_b32_e32 v2, v1
-; GCN-O0-NEXT:    buffer_store_dword v2, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill
+; GCN-O0-NEXT:    ; implicit-def: $vgpr4 : SGPR spill to VGPR lane
+; GCN-O0-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-O0-NEXT:    v_writelane_b32 v4, s0, 0
+; GCN-O0-NEXT:    v_writelane_b32 v4, s1, 1
+; GCN-O0-NEXT:    v_mov_b32_e32 v1, v0
+; GCN-O0-NEXT:    buffer_store_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill
 ; GCN-O0-NEXT:    s_mov_b32 s0, 1
-; GCN-O0-NEXT:    v_cmp_gt_u32_e64 s[2:3], v1, s0
+; GCN-O0-NEXT:    v_cmp_gt_u32_e64 s[2:3], v0, s0
 ; GCN-O0-NEXT:    s_mov_b64 s[0:1], exec
-; GCN-O0-NEXT:    v_writelane_b32 v0, s0, 2
-; GCN-O0-NEXT:    v_writelane_b32 v0, s1, 3
+; GCN-O0-NEXT:    v_writelane_b32 v4, s0, 2
+; GCN-O0-NEXT:    v_writelane_b32 v4, s1, 3
 ; GCN-O0-NEXT:    s_or_saveexec_b64 s[8:9], -1
-; GCN-O0-NEXT:    buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill
+; GCN-O0-NEXT:    buffer_store_dword v4, off, s[12:15], 0 ; 4-byte Folded Spill
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[8:9]
 ; GCN-O0-NEXT:    s_and_b64 s[0:1], s[0:1], s[2:3]
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[0:1]
@@ -247,12 +238,12 @@ define amdgpu_kernel void @uncollapsable_nested_if(ptr addrspace(1) nocapture %a
 ; GCN-O0-NEXT:  ; %bb.1: ; %bb.outer.then
 ; GCN-O0-NEXT:    s_or_saveexec_b64 s[8:9], -1
 ; GCN-O0-NEXT:    s_waitcnt expcnt(0)
-; GCN-O0-NEXT:    buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
+; GCN-O0-NEXT:    buffer_load_dword v4, off, s[12:15], 0 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[8:9]
 ; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
-; GCN-O0-NEXT:    v_readlane_b32 s4, v0, 0
-; GCN-O0-NEXT:    v_readlane_b32 s5, v0, 1
-; GCN-O0-NEXT:    buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
+; GCN-O0-NEXT:    v_readlane_b32 s4, v4, 0
+; GCN-O0-NEXT:    v_readlane_b32 s5, v4, 1
+; GCN-O0-NEXT:    buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    s_mov_b32 s2, 0xf000
 ; GCN-O0-NEXT:    s_mov_b32 s0, 0
 ; GCN-O0-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1
@@ -260,19 +251,19 @@ define amdgpu_kernel void @uncollapsable_nested_if(ptr addrspace(1) nocapture %a
 ; GCN-O0-NEXT:    ; kill: def $sgpr4_sgpr5 killed $sgpr4_sgpr5 def $sgpr4_sgpr5_sgpr6_sgpr7
 ; GCN-O0-NEXT:    s_mov_b64 s[6:7], s[0:1]
 ; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
-; GCN-O0-NEXT:    v_ashrrev_i32_e64 v4, 31, v1
-; GCN-O0-NEXT:    v_mov_b32_e32 v2, v1
-; GCN-O0-NEXT:    v_mov_b32_e32 v3, v4
+; GCN-O0-NEXT:    v_ashrrev_i32_e64 v3, 31, v0
+; GCN-O0-NEXT:    v_mov_b32_e32 v1, v0
+; GCN-O0-NEXT:    v_mov_b32_e32 v2, v3
 ; GCN-O0-NEXT:    s_mov_b32 s0, 2
-; GCN-O0-NEXT:    v_lshl_b64 v[3:4], v[2:3], s0
-; GCN-O0-NEXT:    v_mov_b32_e32 v2, 0
-; GCN-O0-NEXT:    buffer_store_dword v2, v[3:4], s[4:7], 0 addr64
-; GCN-O0-NEXT:    v_cmp_ne_u32_e64 s[2:3], v1, s0
+; GCN-O0-NEXT:    v_lshl_b64 v[2:3], v[1:2], s0
+; GCN-O0-NEXT:    v_mov_b32_e32 v1, 0
+; GCN-O0-NEXT:    buffer_store_dword v1, v[2:3], s[4:7], 0 addr64
+; GCN-O0-NEXT:    v_cmp_ne_u32_e64 s[2:3], v0, s0
 ; GCN-O0-NEXT:    s_mov_b64 s[0:1], exec
-; GCN-O0-NEXT:    v_writelane_b32 v0, s0, 4
-; GCN-O0-NEXT:    v_writelane_b32 v0, s1, 5
+; GCN-O0-NEXT:    v_writelane_b32 v4, s0, 4
+; GCN-O0-NEXT:    v_writelane_b32 v4, s1, 5
 ; GCN-O0-NEXT:    s_or_saveexec_b64 s[8:9], -1
-; GCN-O0-NEXT:    buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill
+; GCN-O0-NEXT:    buffer_store_dword v4, off, s[12:15], 0 ; 4-byte Folded Spill
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[8:9]
 ; GCN-O0-NEXT:    s_and_b64 s[0:1], s[0:1], s[2:3]
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[0:1]
@@ -280,11 +271,11 @@ define amdgpu_kernel void @uncollapsable_nested_if(ptr addrspace(1) nocapture %a
 ; GCN-O0-NEXT:  ; %bb.2: ; %bb.inner.then
 ; GCN-O0-NEXT:    s_or_saveexec_b64 s[8:9], -1
 ; GCN-O0-NEXT:    s_waitcnt expcnt(0)
-; GCN-O0-NEXT:    buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
+; GCN-O0-NEXT:    buffer_load_dword v4, off, s[12:15], 0 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[8:9]
 ; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
-; GCN-O0-NEXT:    v_readlane_b32 s0, v0, 0
-; GCN-O0-NEXT:    v_readlane_b32 s1, v0, 1
+; GCN-O0-NEXT:    v_readlane_b32 s0, v4, 0
+; GCN-O0-NEXT:    v_readlane_b32 s1, v4, 1
 ; GCN-O0-NEXT:    buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    v_mov_b32_e32 v0, 1
 ; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
@@ -305,24 +296,24 @@ define amdgpu_kernel void @uncollapsable_nested_if(ptr addrspace(1) nocapture %a
 ; GCN-O0-NEXT:  .LBB1_3: ; %Flow
 ; GCN-O0-NEXT:    s_or_saveexec_b64 s[8:9], -1
 ; GCN-O0-NEXT:    s_waitcnt expcnt(0)
-; GCN-O0-NEXT:    buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
+; GCN-O0-NEXT:    buffer_load_dword v4, off, s[12:15], 0 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[8:9]
 ; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
-; GCN-O0-NEXT:    v_readlane_b32 s0, v0, 2
-; GCN-O0-NEXT:    v_readlane_b32 s1, v0, 3
+; GCN-O0-NEXT:    v_readlane_b32 s0, v4, 2
+; GCN-O0-NEXT:    v_readlane_b32 s1, v4, 3
 ; GCN-O0-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; GCN-O0-NEXT:    s_branch .LBB1_5
 ; GCN-O0-NEXT:  .LBB1_4: ; %bb.inner.end
 ; GCN-O0-NEXT:    s_or_saveexec_b64 s[8:9], -1
 ; GCN-O0-NEXT:    s_waitcnt expcnt(0)
-; GCN-O0-NEXT:    buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
+; GCN-O0-NEXT:    buffer_load_dword v4, off, s[12:15], 0 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[8:9]
 ; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
-; GCN-O0-NEXT:    v_readlane_b32 s2, v0, 4
-; GCN-O0-NEXT:    v_readlane_b32 s3, v0, 5
+; GCN-O0-NEXT:    v_readlane_b32 s2, v4, 4
+; GCN-O0-NEXT:    v_readlane_b32 s3, v4, 5
 ; GCN-O0-NEXT:    s_or_b64 exec, exec, s[2:3]
-; GCN-O0-NEXT:    v_readlane_b32 s0, v0, 0
-; GCN-O0-NEXT:    v_readlane_b32 s1, v0, 1
+; GCN-O0-NEXT:    v_readlane_b32 s0, v4, 0
+; GCN-O0-NEXT:    v_readlane_b32 s1, v4, 1
 ; GCN-O0-NEXT:    buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    v_mov_b32_e32 v0, 2
 ; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
@@ -340,14 +331,10 @@ define amdgpu_kernel void @uncollapsable_nested_if(ptr addrspace(1) nocapture %a
 ; GCN-O0-NEXT:    buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
 ; GCN-O0-NEXT:    s_branch .LBB1_3
 ; GCN-O0-NEXT:  .LBB1_5: ; %bb.outer.end
-; GCN-O0-NEXT:    s_or_saveexec_b64 s[8:9], -1
-; GCN-O0-NEXT:    buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
-; GCN-O0-NEXT:    s_mov_b64 exec, s[8:9]
-; GCN-O0-NEXT:    v_mov_b32_e32 v2, 3
-; GCN-O0-NEXT:    v_mov_b32_e32 v1, 0
+; GCN-O0-NEXT:    v_mov_b32_e32 v1, 3
+; GCN-O0-NEXT:    v_mov_b32_e32 v0, 0
 ; GCN-O0-NEXT:    s_mov_b32 m0, -1
-; GCN-O0-NEXT:    ds_write_b32 v1, v2
-; GCN-O0-NEXT:    ; kill: killed $vgpr0
+; GCN-O0-NEXT:    ds_write_b32 v0, v1
 ; GCN-O0-NEXT:    s_endpgm
 bb:
   %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
@@ -433,19 +420,14 @@ define amdgpu_kernel void @nested_if_if_else(ptr addrspace(1) nocapture %arg) {
 ; GCN-O0-NEXT:    s_mov_b32 s15, 0xe8f000
 ; GCN-O0-NEXT:    s_add_u32 s12, s12, s9
 ; GCN-O0-NEXT:    s_addc_u32 s13, s13, 0
-; GCN-O0-NEXT:    ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
-; GCN-O0-NEXT:    v_mov_b32_e32 v1, v0
-; GCN-O0-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; GCN-O0-NEXT:    buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
-; GCN-O0-NEXT:    s_mov_b64 exec, s[6:7]
 ; GCN-O0-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x9
 ; GCN-O0-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN-O0-NEXT:    s_mov_b64 s[2:3], s[0:1]
-; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
-; GCN-O0-NEXT:    v_writelane_b32 v0, s2, 0
-; GCN-O0-NEXT:    v_writelane_b32 v0, s3, 1
-; GCN-O0-NEXT:    v_mov_b32_e32 v2, v1
-; GCN-O0-NEXT:    buffer_store_dword v2, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill
+; GCN-O0-NEXT:    ; implicit-def: $vgpr4 : SGPR spill to VGPR lane
+; GCN-O0-NEXT:    v_writelane_b32 v4, s2, 0
+; GCN-O0-NEXT:    v_writelane_b32 v4, s3, 1
+; GCN-O0-NEXT:    v_mov_b32_e32 v1, v0
+; GCN-O0-NEXT:    buffer_store_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill
 ; GCN-O0-NEXT:    s_mov_b32 s2, 0xf000
 ; GCN-O0-NEXT:    s_mov_b32 s4, 0
 ; GCN-O0-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5
@@ -453,22 +435,22 @@ define amdgpu_kernel void @nested_if_if_else(ptr addrspace(1) nocapture %arg) {
 ; GCN-O0-NEXT:    ; kill: def $sgpr0_sgpr1 killed $sgpr0_sgpr1 def $sgpr0_sgpr1_sgpr2_sgpr3
 ; GCN-O0-NEXT:    s_mov_b64 s[2:3], s[4:5]
 ; GCN-O0-NEXT:    s_mov_b32 s4, 2
-; GCN-O0-NEXT:    v_lshlrev_b32_e64 v3, s4, v1
+; GCN-O0-NEXT:    v_lshlrev_b32_e64 v2, s4, v0
 ; GCN-O0-NEXT:    s_mov_b32 s4, 0
 ; GCN-O0-NEXT:    ; implicit-def: $sgpr4
 ; GCN-O0-NEXT:    s_waitcnt expcnt(0)
-; GCN-O0-NEXT:    v_mov_b32_e32 v2, 0
-; GCN-O0-NEXT:    ; kill: def $vgpr3 killed $vgpr3 def $vgpr3_vgpr4 killed $exec
-; GCN-O0-NEXT:    v_mov_b32_e32 v4, v2
-; GCN-O0-NEXT:    v_mov_b32_e32 v2, 0
-; GCN-O0-NEXT:    buffer_store_dword v2, v[3:4], s[0:3], 0 addr64
+; GCN-O0-NEXT:    v_mov_b32_e32 v1, 0
+; GCN-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GCN-O0-NEXT:    v_mov_b32_e32 v3, v1
+; GCN-O0-NEXT:    v_mov_b32_e32 v1, 0
+; GCN-O0-NEXT:    buffer_store_dword v1, v[2:3], s[0:3], 0 addr64
 ; GCN-O0-NEXT:    s_mov_b32 s0, 1
-; GCN-O0-NEXT:    v_cmp_gt_u32_e64 s[2:3], v1, s0
+; GCN-O0-NEXT:    v_cmp_gt_u32_e64 s[2:3], v0, s0
 ; GCN-O0-NEXT:    s_mov_b64 s[0:1], exec
-; GCN-O0-NEXT:    v_writelane_b32 v0, s0, 2
-; GCN-O0-NEXT:    v_writelane_b32 v0, s1, 3
+; GCN-O0-NEXT:    v_writelane_b32 v4, s0, 2
+; GCN-O0-NEXT:    v_writelane_b32 v4, s1, 3
 ; GCN-O0-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; GCN-O0-NEXT:    buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill
+; GCN-O0-NEXT:    buffer_store_dword v4, off, s[12:15], 0 ; 4-byte Folded Spill
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[6:7]
 ; GCN-O0-NEXT:    s_and_b64 s[0:1], s[0:1], s[2:3]
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[0:1]
@@ -476,19 +458,19 @@ define amdgpu_kernel void @nested_if_if_else(ptr addrspace(1) nocapture %arg) {
 ; GCN-O0-NEXT:  ; %bb.1: ; %bb.outer.then
 ; GCN-O0-NEXT:    s_or_saveexec_b64 s[6:7], -1
 ; GCN-O0-NEXT:    s_waitcnt expcnt(0)
-; GCN-O0-NEXT:    buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
+; GCN-O0-NEXT:    buffer_load_dword v4, off, s[12:15], 0 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[6:7]
-; GCN-O0-NEXT:    buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
+; GCN-O0-NEXT:    buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    s_mov_b32 s0, 2
 ; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
-; GCN-O0-NEXT:    v_cmp_ne_u32_e64 s[0:1], v1, s0
+; GCN-O0-NEXT:    v_cmp_ne_u32_e64 s[0:1], v0, s0
 ; GCN-O0-NEXT:    s_mov_b64 s[2:3], exec
 ; GCN-O0-NEXT:    s_and_b64 s[0:1], s[2:3], s[0:1]
 ; GCN-O0-NEXT:    s_xor_b64 s[2:3], s[0:1], s[2:3]
-; GCN-O0-NEXT:    v_writelane_b32 v0, s2, 4
-; GCN-O0-NEXT:    v_writelane_b32 v0, s3, 5
+; GCN-O0-NEXT:    v_writelane_b32 v4, s2, 4
+; GCN-O0-NEXT:    v_writelane_b32 v4, s3, 5
 ; GCN-O0-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; GCN-O0-NEXT:    buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill
+; GCN-O0-NEXT:    buffer_store_dword v4, off, s[12:15], 0 ; 4-byte Folded Spill
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[6:7]
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[0:1]
 ; GCN-O0-NEXT:    s_cbranch_execz .LBB2_2
@@ -496,28 +478,28 @@ define amdgpu_kernel void @nested_if_if_else(ptr addrspace(1) nocapture %arg) {
 ; GCN-O0-NEXT:  .LBB2_2: ; %Flow
 ; GCN-O0-NEXT:    s_or_saveexec_b64 s[6:7], -1
 ; GCN-O0-NEXT:    s_waitcnt expcnt(0)
-; GCN-O0-NEXT:    buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
+; GCN-O0-NEXT:    buffer_load_dword v4, off, s[12:15], 0 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[6:7]
 ; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
-; GCN-O0-NEXT:    v_readlane_b32 s0, v0, 4
-; GCN-O0-NEXT:    v_readlane_b32 s1, v0, 5
+; GCN-O0-NEXT:    v_readlane_b32 s0, v4, 4
+; GCN-O0-NEXT:    v_readlane_b32 s1, v4, 5
 ; GCN-O0-NEXT:    s_or_saveexec_b64 s[0:1], s[0:1]
 ; GCN-O0-NEXT:    s_and_b64 s[0:1], exec, s[0:1]
-; GCN-O0-NEXT:    v_writelane_b32 v0, s0, 6
-; GCN-O0-NEXT:    v_writelane_b32 v0, s1, 7
+; GCN-O0-NEXT:    v_writelane_b32 v4, s0, 6
+; GCN-O0-NEXT:    v_writelane_b32 v4, s1, 7
 ; GCN-O0-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; GCN-O0-NEXT:    buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill
+; GCN-O0-NEXT:    buffer_store_dword v4, off, s[12:15], 0 ; 4-byte Folded Spill
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[6:7]
 ; GCN-O0-NEXT:    s_xor_b64 exec, exec, s[0:1]
 ; GCN-O0-NEXT:    s_cbranch_execz .LBB2_5
 ; GCN-O0-NEXT:  ; %bb.3: ; %bb.then
 ; GCN-O0-NEXT:    s_or_saveexec_b64 s[6:7], -1
 ; GCN-O0-NEXT:    s_waitcnt expcnt(0)
-; GCN-O0-NEXT:    buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
+; GCN-O0-NEXT:    buffer_load_dword v4, off, s[12:15], 0 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[6:7]
 ; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
-; GCN-O0-NEXT:    v_readlane_b32 s0, v0, 0
-; GCN-O0-NEXT:    v_readlane_b32 s1, v0, 1
+; GCN-O0-NEXT:    v_readlane_b32 s0, v4, 0
+; GCN-O0-NEXT:    v_readlane_b32 s1, v4, 1
 ; GCN-O0-NEXT:    buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    v_mov_b32_e32 v0, 1
 ; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
@@ -538,11 +520,11 @@ define amdgpu_kernel void @nested_if_if_else(ptr addrspace(1) nocapture %arg) {
 ; GCN-O0-NEXT:  .LBB2_4: ; %bb.else
 ; GCN-O0-NEXT:    s_or_saveexec_b64 s[6:7], -1
 ; GCN-O0-NEXT:    s_waitcnt expcnt(0)
-; GCN-O0-NEXT:    buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
+; GCN-O0-NEXT:    buffer_load_dword v4, off, s[12:15], 0 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[6:7]
 ; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
-; GCN-O0-NEXT:    v_readlane_b32 s0, v0, 0
-; GCN-O0-NEXT:    v_readlane_b32 s1, v0, 1
+; GCN-O0-NEXT:    v_readlane_b32 s0, v4, 0
+; GCN-O0-NEXT:    v_readlane_b32 s1, v4, 1
 ; GCN-O0-NEXT:    buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    v_mov_b32_e32 v0, 2
 ; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
@@ -562,26 +544,25 @@ define amdgpu_kernel void @nested_if_if_else(ptr addrspace(1) nocapture %arg) {
 ; GCN-O0-NEXT:  .LBB2_5: ; %Flow1
 ; GCN-O0-NEXT:    s_or_saveexec_b64 s[6:7], -1
 ; GCN-O0-NEXT:    s_waitcnt expcnt(0)
-; GCN-O0-NEXT:    buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
+; GCN-O0-NEXT:    buffer_load_dword v4, off, s[12:15], 0 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[6:7]
 ; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
-; GCN-O0-NEXT:    v_readlane_b32 s0, v0, 6
-; GCN-O0-NEXT:    v_readlane_b32 s1, v0, 7
+; GCN-O0-NEXT:    v_readlane_b32 s0, v4, 6
+; GCN-O0-NEXT:    v_readlane_b32 s1, v4, 7
 ; GCN-O0-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; GCN-O0-NEXT:  .LBB2_6: ; %bb.outer.end
 ; GCN-O0-NEXT:    s_or_saveexec_b64 s[6:7], -1
 ; GCN-O0-NEXT:    s_waitcnt expcnt(0)
-; GCN-O0-NEXT:    buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
+; GCN-O0-NEXT:    buffer_load_dword v4, off, s[12:15], 0 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[6:7]
 ; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
-; GCN-O0-NEXT:    v_readlane_b32 s0, v0, 2
-; GCN-O0-NEXT:    v_readlane_b32 s1, v0, 3
+; GCN-O0-NEXT:    v_readlane_b32 s0, v4, 2
+; GCN-O0-NEXT:    v_readlane_b32 s1, v4, 3
 ; GCN-O0-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GCN-O0-NEXT:    v_mov_b32_e32 v2, 3
-; GCN-O0-NEXT:    v_mov_b32_e32 v1, 0
+; GCN-O0-NEXT:    v_mov_b32_e32 v1, 3
+; GCN-O0-NEXT:    v_mov_b32_e32 v0, 0
 ; GCN-O0-NEXT:    s_mov_b32 m0, -1
-; GCN-O0-NEXT:    ds_write_b32 v1, v2
-; GCN-O0-NEXT:    ; kill: killed $vgpr0
+; GCN-O0-NEXT:    ds_write_b32 v0, v1
 ; GCN-O0-NEXT:    s_endpgm
 bb:
   %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
@@ -681,51 +662,47 @@ define amdgpu_kernel void @nested_if_else_if(ptr addrspace(1) nocapture %arg) {
 ; GCN-O0-NEXT:    s_mov_b32 s15, 0xe8f000
 ; GCN-O0-NEXT:    s_add_u32 s12, s12, s9
 ; GCN-O0-NEXT:    s_addc_u32 s13, s13, 0
-; GCN-O0-NEXT:    ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
-; GCN-O0-NEXT:    v_mov_b32_e32 v1, v0
-; GCN-O0-NEXT:    s_or_saveexec_b64 s[8:9], -1
-; GCN-O0-NEXT:    buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
-; GCN-O0-NEXT:    s_mov_b64 exec, s[8:9]
 ; GCN-O0-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x9
-; GCN-O0-NEXT:    v_mov_b32_e32 v2, v1
-; GCN-O0-NEXT:    buffer_store_dword v2, off, s[12:15], 0 offset:12 ; 4-byte Folded Spill
+; GCN-O0-NEXT:    v_mov_b32_e32 v1, v0
+; GCN-O0-NEXT:    buffer_store_dword v1, off, s[12:15], 0 offset:12 ; 4-byte Folded Spill
 ; GCN-O0-NEXT:    s_mov_b32 s0, 2
-; GCN-O0-NEXT:    v_lshlrev_b32_e64 v3, s0, v1
+; GCN-O0-NEXT:    v_lshlrev_b32_e64 v2, s0, v0
 ; GCN-O0-NEXT:    s_mov_b32 s1, 0
 ; GCN-O0-NEXT:    ; implicit-def: $sgpr1
 ; GCN-O0-NEXT:    s_waitcnt expcnt(0)
-; GCN-O0-NEXT:    v_mov_b32_e32 v2, 0
-; GCN-O0-NEXT:    ; kill: def $vgpr3 killed $vgpr3 def $vgpr3_vgpr4 killed $exec
-; GCN-O0-NEXT:    v_mov_b32_e32 v4, v2
+; GCN-O0-NEXT:    v_mov_b32_e32 v1, 0
+; GCN-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GCN-O0-NEXT:    v_mov_b32_e32 v3, v1
 ; GCN-O0-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN-O0-NEXT:    s_mov_b32 s2, s4
-; GCN-O0-NEXT:    v_mov_b32_e32 v2, v3
+; GCN-O0-NEXT:    v_mov_b32_e32 v1, v2
 ; GCN-O0-NEXT:    s_mov_b32 s1, s5
-; GCN-O0-NEXT:    v_mov_b32_e32 v6, v4
-; GCN-O0-NEXT:    v_add_i32_e64 v5, s[2:3], s2, v2
-; GCN-O0-NEXT:    v_mov_b32_e32 v2, s1
-; GCN-O0-NEXT:    v_addc_u32_e64 v2, s[2:3], v2, v6, s[2:3]
-; GCN-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
-; GCN-O0-NEXT:    v_mov_b32_e32 v6, v2
-; GCN-O0-NEXT:    buffer_store_dword v5, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill
+; GCN-O0-NEXT:    v_mov_b32_e32 v5, v3
+; GCN-O0-NEXT:    v_add_i32_e64 v4, s[2:3], s2, v1
+; GCN-O0-NEXT:    v_mov_b32_e32 v1, s1
+; GCN-O0-NEXT:    v_addc_u32_e64 v1, s[2:3], v1, v5, s[2:3]
+; GCN-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
+; GCN-O0-NEXT:    v_mov_b32_e32 v5, v1
+; GCN-O0-NEXT:    buffer_store_dword v4, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill
 ; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
-; GCN-O0-NEXT:    buffer_store_dword v6, off, s[12:15], 0 offset:8 ; 4-byte Folded Spill
+; GCN-O0-NEXT:    buffer_store_dword v5, off, s[12:15], 0 offset:8 ; 4-byte Folded Spill
 ; GCN-O0-NEXT:    s_mov_b32 s1, 0xf000
 ; GCN-O0-NEXT:    s_mov_b32 s2, 0
 ; GCN-O0-NEXT:    ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3
 ; GCN-O0-NEXT:    s_mov_b32 s3, s1
 ; GCN-O0-NEXT:    ; kill: def $sgpr4_sgpr5 killed $sgpr4_sgpr5 def $sgpr4_sgpr5_sgpr6_sgpr7
 ; GCN-O0-NEXT:    s_mov_b64 s[6:7], s[2:3]
-; GCN-O0-NEXT:    v_mov_b32_e32 v2, 0
-; GCN-O0-NEXT:    buffer_store_dword v2, v[3:4], s[4:7], 0 addr64
-; GCN-O0-NEXT:    v_cmp_lt_u32_e64 s[0:1], v1, s0
+; GCN-O0-NEXT:    v_mov_b32_e32 v1, 0
+; GCN-O0-NEXT:    buffer_store_dword v1, v[2:3], s[4:7], 0 addr64
+; GCN-O0-NEXT:    v_cmp_lt_u32_e64 s[0:1], v0, s0
 ; GCN-O0-NEXT:    s_mov_b64 s[2:3], exec
 ; GCN-O0-NEXT:    s_and_b64 s[0:1], s[2:3], s[0:1]
 ; GCN-O0-NEXT:    s_xor_b64 s[2:3], s[0:1], s[2:3]
-; GCN-O0-NEXT:    v_writelane_b32 v0, s2, 0
-; GCN-O0-NEXT:    v_writelane_b32 v0, s3, 1
+; GCN-O0-NEXT:    ; implicit-def: $vgpr6 : SGPR spill to VGPR lane
+; GCN-O0-NEXT:    v_writelane_b32 v6, s2, 0
+; GCN-O0-NEXT:    v_writelane_b32 v6, s3, 1
 ; GCN-O0-NEXT:    s_or_saveexec_b64 s[8:9], -1
-; GCN-O0-NEXT:    buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill
+; GCN-O0-NEXT:    buffer_store_dword v6, off, s[12:15], 0 ; 4-byte Folded Spill
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[8:9]
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[0:1]
 ; GCN-O0-NEXT:    s_cbranch_execz .LBB3_1
@@ -733,28 +710,28 @@ define amdgpu_kernel void @nested_if_else_if(ptr addrspace(1) nocapture %arg) {
 ; GCN-O0-NEXT:  .LBB3_1: ; %Flow2
 ; GCN-O0-NEXT:    s_or_saveexec_b64 s[8:9], -1
 ; GCN-O0-NEXT:    s_waitcnt expcnt(0)
-; GCN-O0-NEXT:    buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
+; GCN-O0-NEXT:    buffer_load_dword v6, off, s[12:15], 0 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[8:9]
 ; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
-; GCN-O0-NEXT:    v_readlane_b32 s0, v0, 0
-; GCN-O0-NEXT:    v_readlane_b32 s1, v0, 1
+; GCN-O0-NEXT:    v_readlane_b32 s0, v6, 0
+; GCN-O0-NEXT:    v_readlane_b32 s1, v6, 1
 ; GCN-O0-NEXT:    s_or_saveexec_b64 s[0:1], s[0:1]
 ; GCN-O0-NEXT:    s_and_b64 s[0:1], exec, s[0:1]
-; GCN-O0-NEXT:    v_writelane_b32 v0, s0, 2
-; GCN-O0-NEXT:    v_writelane_b32 v0, s1, 3
+; GCN-O0-NEXT:    v_writelane_b32 v6, s0, 2
+; GCN-O0-NEXT:    v_writelane_b32 v6, s1, 3
 ; GCN-O0-NEXT:    s_or_saveexec_b64 s[8:9], -1
-; GCN-O0-NEXT:    buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill
+; GCN-O0-NEXT:    buffer_store_dword v6, off, s[12:15], 0 ; 4-byte Folded Spill
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[8:9]
 ; GCN-O0-NEXT:    s_xor_b64 exec, exec, s[0:1]
 ; GCN-O0-NEXT:    s_cbranch_execz .LBB3_8
 ; GCN-O0-NEXT:  ; %bb.2: ; %bb.outer.then
 ; GCN-O0-NEXT:    s_or_saveexec_b64 s[8:9], -1
 ; GCN-O0-NEXT:    s_waitcnt expcnt(0)
-; GCN-O0-NEXT:    buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
+; GCN-O0-NEXT:    buffer_load_dword v6, off, s[12:15], 0 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[8:9]
-; GCN-O0-NEXT:    buffer_load_dword v1, off, s[12:15], 0 offset:12 ; 4-byte Folded Reload
-; GCN-O0-NEXT:    buffer_load_dword v3, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
-; GCN-O0-NEXT:    buffer_load_dword v4, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
+; GCN-O0-NEXT:    buffer_load_dword v0, off, s[12:15], 0 offset:12 ; 4-byte Folded Reload
+; GCN-O0-NEXT:    buffer_load_dword v2, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
+; GCN-O0-NEXT:    buffer_load_dword v3, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    s_mov_b32 s0, 0xf000
 ; GCN-O0-NEXT:    s_mov_b32 s2, 0
 ; GCN-O0-NEXT:    s_mov_b32 s4, s2
@@ -763,16 +740,16 @@ define amdgpu_kernel void @nested_if_else_if(ptr addrspace(1) nocapture %arg) {
 ; GCN-O0-NEXT:    s_mov_b32 s1, s2
 ; GCN-O0-NEXT:    ; kill: def $sgpr0_sgpr1 killed $sgpr0_sgpr1 def $sgpr0_sgpr1_sgpr2_sgpr3
 ; GCN-O0-NEXT:    s_mov_b64 s[2:3], s[4:5]
-; GCN-O0-NEXT:    v_mov_b32_e32 v2, 1
+; GCN-O0-NEXT:    v_mov_b32_e32 v1, 1
 ; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
-; GCN-O0-NEXT:    buffer_store_dword v2, v[3:4], s[0:3], 0 addr64 offset:4
+; GCN-O0-NEXT:    buffer_store_dword v1, v[2:3], s[0:3], 0 addr64 offset:4
 ; GCN-O0-NEXT:    s_mov_b32 s0, 2
-; GCN-O0-NEXT:    v_cmp_eq_u32_e64 s[2:3], v1, s0
+; GCN-O0-NEXT:    v_cmp_eq_u32_e64 s[2:3], v0, s0
 ; GCN-O0-NEXT:    s_mov_b64 s[0:1], exec
-; GCN-O0-NEXT:    v_writelane_b32 v0, s0, 4
-; GCN-O0-NEXT:    v_writelane_b32 v0, s1, 5
+; GCN-O0-NEXT:    v_writelane_b32 v6, s0, 4
+; GCN-O0-NEXT:    v_writelane_b32 v6, s1, 5
 ; GCN-O0-NEXT:    s_or_saveexec_b64 s[8:9], -1
-; GCN-O0-NEXT:    buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill
+; GCN-O0-NEXT:    buffer_store_dword v6, off, s[12:15], 0 ; 4-byte Folded Spill
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[8:9]
 ; GCN-O0-NEXT:    s_and_b64 s[0:1], s[0:1], s[2:3]
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[0:1]
@@ -789,7 +766,6 @@ define amdgpu_kernel void @nested_if_else_if(ptr addrspace(1) nocapture %arg) {
 ; GCN-O0-NEXT:    s_mov_b32 s1, s2
 ; GCN-O0-NEXT:    ; kill: def $sgpr0_sgpr1 killed $sgpr0_sgpr1 def $sgpr0_sgpr1_sgpr2_sgpr3
 ; GCN-O0-NEXT:    s_mov_b64 s[2:3], s[4:5]
-; GCN-O0-NEXT:    s_waitcnt expcnt(0)
 ; GCN-O0-NEXT:    v_mov_b32_e32 v0, 2
 ; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-O0-NEXT:    buffer_store_dword v0, v[1:2], s[0:3], 0 addr64 offset:8
@@ -797,11 +773,11 @@ define amdgpu_kernel void @nested_if_else_if(ptr addrspace(1) nocapture %arg) {
 ; GCN-O0-NEXT:  .LBB3_4: ; %bb.outer.else
 ; GCN-O0-NEXT:    s_or_saveexec_b64 s[8:9], -1
 ; GCN-O0-NEXT:    s_waitcnt expcnt(0)
-; GCN-O0-NEXT:    buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
+; GCN-O0-NEXT:    buffer_load_dword v6, off, s[12:15], 0 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[8:9]
-; GCN-O0-NEXT:    buffer_load_dword v1, off, s[12:15], 0 offset:12 ; 4-byte Folded Reload
-; GCN-O0-NEXT:    buffer_load_dword v3, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
-; GCN-O0-NEXT:    buffer_load_dword v4, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
+; GCN-O0-NEXT:    buffer_load_dword v0, off, s[12:15], 0 offset:12 ; 4-byte Folded Reload
+; GCN-O0-NEXT:    buffer_load_dword v2, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
+; GCN-O0-NEXT:    buffer_load_dword v3, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    s_mov_b32 s1, 0xf000
 ; GCN-O0-NEXT:    s_mov_b32 s0, 0
 ; GCN-O0-NEXT:    s_mov_b32 s2, s0
@@ -810,15 +786,15 @@ define amdgpu_kernel void @nested_if_else_if(ptr addrspace(1) nocapture %arg) {
 ; GCN-O0-NEXT:    s_mov_b32 s5, s0
 ; GCN-O0-NEXT:    ; kill: def $sgpr4_sgpr5 killed $sgpr4_sgpr5 def $sgpr4_sgpr5_sgpr6_sgpr7
 ; GCN-O0-NEXT:    s_mov_b64 s[6:7], s[2:3]
-; GCN-O0-NEXT:    v_mov_b32_e32 v2, 3
+; GCN-O0-NEXT:    v_mov_b32_e32 v1, 3
 ; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
-; GCN-O0-NEXT:    buffer_store_dword v2, v[3:4], s[4:7], 0 addr64 offset:12
-; GCN-O0-NEXT:    v_cmp_eq_u32_e64 s[2:3], v1, s0
+; GCN-O0-NEXT:    buffer_store_dword v1, v[2:3], s[4:7], 0 addr64 offset:12
+; GCN-O0-NEXT:    v_cmp_eq_u32_e64 s[2:3], v0, s0
 ; GCN-O0-NEXT:    s_mov_b64 s[0:1], exec
-; GCN-O0-NEXT:    v_writelane_b32 v0, s0, 6
-; GCN-O0-NEXT:    v_writelane_b32 v0, s1, 7
+; GCN-O0-NEXT:    v_writelane_b32 v6, s0, 6
+; GCN-O0-NEXT:    v_writelane_b32 v6, s1, 7
 ; GCN-O0-NEXT:    s_or_saveexec_b64 s[8:9], -1
-; GCN-O0-NEXT:    buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill
+; GCN-O0-NEXT:    buffer_store_dword v6, off, s[12:15], 0 ; 4-byte Folded Spill
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[8:9]
 ; GCN-O0-NEXT:    s_and_b64 s[0:1], s[0:1], s[2:3]
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[0:1]
@@ -835,43 +811,41 @@ define amdgpu_kernel void @nested_if_else_if(ptr addrspace(1) nocapture %arg) {
 ; GCN-O0-NEXT:    s_mov_b32 s1, s2
 ; GCN-O0-NEXT:    ; kill: def $sgpr0_sgpr1 killed $sgpr0_sgpr1 def $sgpr0_sgpr1_sgpr2_sgpr3
 ; GCN-O0-NEXT:    s_mov_b64 s[2:3], s[4:5]
-; GCN-O0-NEXT:    s_waitcnt expcnt(0)
 ; GCN-O0-NEXT:    v_mov_b32_e32 v0, 4
 ; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-O0-NEXT:    buffer_store_dword v0, v[1:2], s[0:3], 0 addr64 offset:16
 ; GCN-O0-NEXT:  .LBB3_6: ; %Flow
 ; GCN-O0-NEXT:    s_or_saveexec_b64 s[8:9], -1
 ; GCN-O0-NEXT:    s_waitcnt expcnt(0)
-; GCN-O0-NEXT:    buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
+; GCN-O0-NEXT:    buffer_load_dword v6, off, s[12:15], 0 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[8:9]
 ; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
-; GCN-O0-NEXT:    v_readlane_b32 s0, v0, 6
-; GCN-O0-NEXT:    v_readlane_b32 s1, v0, 7
+; GCN-O0-NEXT:    v_readlane_b32 s0, v6, 6
+; GCN-O0-NEXT:    v_readlane_b32 s1, v6, 7
 ; GCN-O0-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; GCN-O0-NEXT:    s_branch .LBB3_1
 ; GCN-O0-NEXT:  .LBB3_7: ; %Flow1
 ; GCN-O0-NEXT:    s_or_saveexec_b64 s[8:9], -1
 ; GCN-O0-NEXT:    s_waitcnt expcnt(0)
-; GCN-O0-NEXT:    buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
+; GCN-O0-NEXT:    buffer_load_dword v6, off, s[12:15], 0 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[8:9]
 ; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
-; GCN-O0-NEXT:    v_readlane_b32 s0, v0, 4
-; GCN-O0-NEXT:    v_readlane_b32 s1, v0, 5
+; GCN-O0-NEXT:    v_readlane_b32 s0, v6, 4
+; GCN-O0-NEXT:    v_readlane_b32 s1, v6, 5
 ; GCN-O0-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; GCN-O0-NEXT:  .LBB3_8: ; %bb.outer.end
 ; GCN-O0-NEXT:    s_or_saveexec_b64 s[8:9], -1
 ; GCN-O0-NEXT:    s_waitcnt expcnt(0)
-; GCN-O0-NEXT:    buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
+; GCN-O0-NEXT:    buffer_load_dword v6, off, s[12:15], 0 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[8:9]
 ; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
-; GCN-O0-NEXT:    v_readlane_b32 s0, v0, 2
-; GCN-O0-NEXT:    v_readlane_b32 s1, v0, 3
+; GCN-O0-NEXT:    v_readlane_b32 s0, v6, 2
+; GCN-O0-NEXT:    v_readlane_b32 s1, v6, 3
 ; GCN-O0-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GCN-O0-NEXT:    v_mov_b32_e32 v2, 3
-; GCN-O0-NEXT:    v_mov_b32_e32 v1, 0
+; GCN-O0-NEXT:    v_mov_b32_e32 v1, 3
+; GCN-O0-NEXT:    v_mov_b32_e32 v0, 0
 ; GCN-O0-NEXT:    s_mov_b32 m0, -1
-; GCN-O0-NEXT:    ds_write_b32 v1, v2
-; GCN-O0-NEXT:    ; kill: killed $vgpr0
+; GCN-O0-NEXT:    ds_write_b32 v0, v1
 ; GCN-O0-NEXT:    s_endpgm
 bb:
   %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
@@ -935,36 +909,32 @@ define amdgpu_kernel void @s_endpgm_unsafe_barrier(ptr addrspace(1) nocapture %a
 ; GCN-O0-NEXT:    s_mov_b32 s15, 0xe8f000
 ; GCN-O0-NEXT:    s_add_u32 s12, s12, s9
 ; GCN-O0-NEXT:    s_addc_u32 s13, s13, 0
-; GCN-O0-NEXT:    ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
-; GCN-O0-NEXT:    v_mov_b32_e32 v1, v0
-; GCN-O0-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; GCN-O0-NEXT:    buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
-; GCN-O0-NEXT:    s_mov_b64 exec, s[6:7]
 ; GCN-O0-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x9
-; GCN-O0-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN-O0-NEXT:    v_writelane_b32 v0, s0, 0
-; GCN-O0-NEXT:    v_writelane_b32 v0, s1, 1
-; GCN-O0-NEXT:    v_mov_b32_e32 v2, v1
-; GCN-O0-NEXT:    buffer_store_dword v2, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill
+; GCN-O0-NEXT:    ; implicit-def: $vgpr3 : SGPR spill to VGPR lane
+; GCN-O0-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-O0-NEXT:    v_writelane_b32 v3, s0, 0
+; GCN-O0-NEXT:    v_writelane_b32 v3, s1, 1
+; GCN-O0-NEXT:    v_mov_b32_e32 v1, v0
+; GCN-O0-NEXT:    buffer_store_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill
 ; GCN-O0-NEXT:    s_mov_b32 s0, 1
-; GCN-O0-NEXT:    v_cmp_gt_u32_e64 s[2:3], v1, s0
+; GCN-O0-NEXT:    v_cmp_gt_u32_e64 s[2:3], v0, s0
 ; GCN-O0-NEXT:    s_mov_b64 s[0:1], exec
-; GCN-O0-NEXT:    v_writelane_b32 v0, s0, 2
-; GCN-O0-NEXT:    v_writelane_b32 v0, s1, 3
+; GCN-O0-NEXT:    v_writelane_b32 v3, s0, 2
+; GCN-O0-NEXT:    v_writelane_b32 v3, s1, 3
 ; GCN-O0-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; GCN-O0-NEXT:    buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill
+; GCN-O0-NEXT:    buffer_store_dword v3, off, s[12:15], 0 ; 4-byte Folded Spill
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[6:7]
 ; GCN-O0-NEXT:    s_and_b64 s[0:1], s[0:1], s[2:3]
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[0:1]
 ; GCN-O0-NEXT:    s_cbranch_execz .LBB4_2
 ; GCN-O0-NEXT:  ; %bb.1: ; %bb.then
 ; GCN-O0-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; GCN-O0-NEXT:    buffer_load_dword v1, off, s[12:15], 0 ; 4-byte Folded Reload
+; GCN-O0-NEXT:    s_waitcnt expcnt(0)
+; GCN-O0-NEXT:    buffer_load_dword v3, off, s[12:15], 0 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[6:7]
 ; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
-; GCN-O0-NEXT:    v_readlane_b32 s0, v1, 0
-; GCN-O0-NEXT:    v_readlane_b32 s1, v1, 1
-; GCN-O0-NEXT:    s_waitcnt expcnt(0)
+; GCN-O0-NEXT:    v_readlane_b32 s0, v3, 0
+; GCN-O0-NEXT:    v_readlane_b32 s1, v3, 1
 ; GCN-O0-NEXT:    buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    s_mov_b32 s2, 0xf000
 ; GCN-O0-NEXT:    s_mov_b32 s4, 0
@@ -983,14 +953,13 @@ define amdgpu_kernel void @s_endpgm_unsafe_barrier(ptr addrspace(1) nocapture %a
 ; GCN-O0-NEXT:  .LBB4_2: ; %bb.end
 ; GCN-O0-NEXT:    s_or_saveexec_b64 s[6:7], -1
 ; GCN-O0-NEXT:    s_waitcnt expcnt(0)
-; GCN-O0-NEXT:    buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
+; GCN-O0-NEXT:    buffer_load_dword v3, off, s[12:15], 0 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[6:7]
 ; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
-; GCN-O0-NEXT:    v_readlane_b32 s0, v0, 2
-; GCN-O0-NEXT:    v_readlane_b32 s1, v0, 3
+; GCN-O0-NEXT:    v_readlane_b32 s0, v3, 2
+; GCN-O0-NEXT:    v_readlane_b32 s1, v3, 3
 ; GCN-O0-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; GCN-O0-NEXT:    s_barrier
-; GCN-O0-NEXT:    ; kill: killed $vgpr0
 ; GCN-O0-NEXT:    s_endpgm
 bb:
   %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
@@ -1082,53 +1051,47 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 {
 ; GCN-O0:       ; %bb.0: ; %bb
 ; GCN-O0-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-O0-NEXT:    s_xor_saveexec_b64 s[4:5], -1
-; GCN-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
-; GCN-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
+; GCN-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[4:5]
-; GCN-O0-NEXT:    ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
-; GCN-O0-NEXT:    v_mov_b32_e32 v1, v0
-; GCN-O0-NEXT:    s_or_saveexec_b64 s[14:15], -1
-; GCN-O0-NEXT:    s_waitcnt expcnt(1)
-; GCN-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
-; GCN-O0-NEXT:    s_mov_b64 exec, s[14:15]
-; GCN-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; GCN-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
 ; GCN-O0-NEXT:    s_mov_b64 s[4:5], 0
 ; GCN-O0-NEXT:    s_mov_b64 s[6:7], s[4:5]
-; GCN-O0-NEXT:    s_waitcnt vmcnt(1)
-; GCN-O0-NEXT:    v_writelane_b32 v0, s6, 0
-; GCN-O0-NEXT:    v_writelane_b32 v0, s7, 1
-; GCN-O0-NEXT:    v_writelane_b32 v0, s4, 2
-; GCN-O0-NEXT:    v_writelane_b32 v0, s5, 3
+; GCN-O0-NEXT:    ; implicit-def: $vgpr6 : SGPR spill to VGPR lane
+; GCN-O0-NEXT:    s_waitcnt expcnt(1)
+; GCN-O0-NEXT:    v_writelane_b32 v6, s6, 0
+; GCN-O0-NEXT:    v_writelane_b32 v6, s7, 1
+; GCN-O0-NEXT:    v_writelane_b32 v6, s4, 2
+; GCN-O0-NEXT:    v_writelane_b32 v6, s5, 3
 ; GCN-O0-NEXT:    s_or_saveexec_b64 s[14:15], -1
-; GCN-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; GCN-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 ; 4-byte Folded Spill
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[14:15]
 ; GCN-O0-NEXT:  .LBB5_1: ; %bb1
 ; GCN-O0-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GCN-O0-NEXT:    s_or_saveexec_b64 s[14:15], -1
 ; GCN-O0-NEXT:    s_waitcnt expcnt(0)
-; GCN-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; GCN-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[14:15]
 ; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
-; GCN-O0-NEXT:    v_readlane_b32 s8, v0, 2
-; GCN-O0-NEXT:    v_readlane_b32 s9, v0, 3
-; GCN-O0-NEXT:    v_readlane_b32 s6, v0, 0
-; GCN-O0-NEXT:    v_readlane_b32 s7, v0, 1
-; GCN-O0-NEXT:    v_writelane_b32 v0, s6, 4
-; GCN-O0-NEXT:    v_writelane_b32 v0, s7, 5
-; GCN-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
+; GCN-O0-NEXT:    v_readlane_b32 s8, v6, 2
+; GCN-O0-NEXT:    v_readlane_b32 s9, v6, 3
+; GCN-O0-NEXT:    v_readlane_b32 s6, v6, 0
+; GCN-O0-NEXT:    v_readlane_b32 s7, v6, 1
+; GCN-O0-NEXT:    v_writelane_b32 v6, s6, 4
+; GCN-O0-NEXT:    v_writelane_b32 v6, s7, 5
+; GCN-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    s_mov_b32 s4, 0x207
 ; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
-; GCN-O0-NEXT:    v_cmp_lt_i32_e64 s[4:5], v1, s4
+; GCN-O0-NEXT:    v_cmp_lt_i32_e64 s[4:5], v0, s4
 ; GCN-O0-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
-; GCN-O0-NEXT:    v_writelane_b32 v0, s4, 6
-; GCN-O0-NEXT:    v_writelane_b32 v0, s5, 7
-; GCN-O0-NEXT:    v_writelane_b32 v0, s6, 0
-; GCN-O0-NEXT:    v_writelane_b32 v0, s7, 1
+; GCN-O0-NEXT:    v_writelane_b32 v6, s4, 6
+; GCN-O0-NEXT:    v_writelane_b32 v6, s5, 7
+; GCN-O0-NEXT:    v_writelane_b32 v6, s6, 0
+; GCN-O0-NEXT:    v_writelane_b32 v6, s7, 1
 ; GCN-O0-NEXT:    s_mov_b64 s[6:7], s[4:5]
-; GCN-O0-NEXT:    v_writelane_b32 v0, s6, 2
-; GCN-O0-NEXT:    v_writelane_b32 v0, s7, 3
+; GCN-O0-NEXT:    v_writelane_b32 v6, s6, 2
+; GCN-O0-NEXT:    v_writelane_b32 v6, s7, 3
 ; GCN-O0-NEXT:    s_or_saveexec_b64 s[14:15], -1
-; GCN-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; GCN-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 ; 4-byte Folded Spill
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[14:15]
 ; GCN-O0-NEXT:    s_andn2_b64 exec, exec, s[4:5]
 ; GCN-O0-NEXT:    s_cbranch_execnz .LBB5_1
@@ -1136,38 +1099,38 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 {
 ; GCN-O0-NEXT:    ; in Loop: Header=BB5_1 Depth=1
 ; GCN-O0-NEXT:    s_or_saveexec_b64 s[14:15], -1
 ; GCN-O0-NEXT:    s_waitcnt expcnt(0)
-; GCN-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; GCN-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[14:15]
 ; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
-; GCN-O0-NEXT:    v_readlane_b32 s4, v0, 6
-; GCN-O0-NEXT:    v_readlane_b32 s5, v0, 7
+; GCN-O0-NEXT:    v_readlane_b32 s4, v6, 6
+; GCN-O0-NEXT:    v_readlane_b32 s5, v6, 7
 ; GCN-O0-NEXT:    s_or_b64 exec, exec, s[4:5]
-; GCN-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
+; GCN-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    s_mov_b32 s6, 0
 ; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
-; GCN-O0-NEXT:    v_cmp_ne_u32_e64 s[4:5], v1, s6
-; GCN-O0-NEXT:    v_cmp_eq_u32_e64 s[6:7], v1, s6
-; GCN-O0-NEXT:    v_writelane_b32 v0, s4, 8
-; GCN-O0-NEXT:    v_writelane_b32 v0, s5, 9
+; GCN-O0-NEXT:    v_cmp_ne_u32_e64 s[4:5], v0, s6
+; GCN-O0-NEXT:    v_cmp_eq_u32_e64 s[6:7], v0, s6
+; GCN-O0-NEXT:    v_writelane_b32 v6, s4, 8
+; GCN-O0-NEXT:    v_writelane_b32 v6, s5, 9
 ; GCN-O0-NEXT:    s_mov_b32 s4, 0
 ; GCN-O0-NEXT:    s_mov_b32 s8, s4
 ; GCN-O0-NEXT:    s_mov_b32 s9, s4
 ; GCN-O0-NEXT:    s_mov_b32 s10, s4
 ; GCN-O0-NEXT:    s_mov_b32 s11, s4
-; GCN-O0-NEXT:    v_mov_b32_e32 v1, s8
-; GCN-O0-NEXT:    v_mov_b32_e32 v2, s9
-; GCN-O0-NEXT:    v_mov_b32_e32 v3, s10
-; GCN-O0-NEXT:    v_mov_b32_e32 v4, s11
-; GCN-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
+; GCN-O0-NEXT:    v_mov_b32_e32 v0, s8
+; GCN-O0-NEXT:    v_mov_b32_e32 v1, s9
+; GCN-O0-NEXT:    v_mov_b32_e32 v2, s10
+; GCN-O0-NEXT:    v_mov_b32_e32 v3, s11
+; GCN-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
 ; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
-; GCN-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
-; GCN-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
-; GCN-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
+; GCN-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; GCN-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
+; GCN-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
 ; GCN-O0-NEXT:    s_mov_b64 s[4:5], exec
-; GCN-O0-NEXT:    v_writelane_b32 v0, s4, 10
-; GCN-O0-NEXT:    v_writelane_b32 v0, s5, 11
+; GCN-O0-NEXT:    v_writelane_b32 v6, s4, 10
+; GCN-O0-NEXT:    v_writelane_b32 v6, s5, 11
 ; GCN-O0-NEXT:    s_or_saveexec_b64 s[14:15], -1
-; GCN-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; GCN-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 ; 4-byte Folded Spill
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[14:15]
 ; GCN-O0-NEXT:    s_and_b64 s[4:5], s[4:5], s[6:7]
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[4:5]
@@ -1176,32 +1139,32 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 {
 ; GCN-O0-NEXT:    ; in Loop: Header=BB5_1 Depth=1
 ; GCN-O0-NEXT:    s_or_saveexec_b64 s[14:15], -1
 ; GCN-O0-NEXT:    s_waitcnt expcnt(0)
-; GCN-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; GCN-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[14:15]
 ; GCN-O0-NEXT:    ; implicit-def: $sgpr4
-; GCN-O0-NEXT:    v_mov_b32_e32 v1, s4
-; GCN-O0-NEXT:    buffer_load_dword v1, v1, s[0:3], 0 offen
+; GCN-O0-NEXT:    v_mov_b32_e32 v0, s4
+; GCN-O0-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen
 ; GCN-O0-NEXT:    s_mov_b32 s4, 0
 ; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
-; GCN-O0-NEXT:    v_cmp_lt_f32_e64 s[6:7], v1, s4
+; GCN-O0-NEXT:    v_cmp_lt_f32_e64 s[6:7], v0, s4
 ; GCN-O0-NEXT:    s_mov_b32 s8, s4
 ; GCN-O0-NEXT:    s_mov_b32 s9, s4
 ; GCN-O0-NEXT:    s_mov_b32 s10, s4
 ; GCN-O0-NEXT:    s_mov_b32 s11, s4
-; GCN-O0-NEXT:    v_mov_b32_e32 v1, s8
-; GCN-O0-NEXT:    v_mov_b32_e32 v2, s9
-; GCN-O0-NEXT:    v_mov_b32_e32 v3, s10
-; GCN-O0-NEXT:    v_mov_b32_e32 v4, s11
-; GCN-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
+; GCN-O0-NEXT:    v_mov_b32_e32 v0, s8
+; GCN-O0-NEXT:    v_mov_b32_e32 v1, s9
+; GCN-O0-NEXT:    v_mov_b32_e32 v2, s10
+; GCN-O0-NEXT:    v_mov_b32_e32 v3, s11
+; GCN-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
 ; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
-; GCN-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
-; GCN-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
-; GCN-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
+; GCN-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
+; GCN-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
+; GCN-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
 ; GCN-O0-NEXT:    s_mov_b64 s[4:5], exec
-; GCN-O0-NEXT:    v_writelane_b32 v0, s4, 12
-; GCN-O0-NEXT:    v_writelane_b32 v0, s5, 13
+; GCN-O0-NEXT:    v_writelane_b32 v6, s4, 12
+; GCN-O0-NEXT:    v_writelane_b32 v6, s5, 13
 ; GCN-O0-NEXT:    s_or_saveexec_b64 s[14:15], -1
-; GCN-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; GCN-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 ; 4-byte Folded Spill
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[14:15]
 ; GCN-O0-NEXT:    s_and_b64 s[4:5], s[4:5], s[6:7]
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[4:5]
@@ -1219,7 +1182,7 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 {
 ; GCN-O0-NEXT:    s_mov_b32 s5, s10
 ; GCN-O0-NEXT:    s_mov_b32 s6, s9
 ; GCN-O0-NEXT:    s_mov_b32 s7, s8
-; GCN-O0-NEXT:    s_waitcnt expcnt(0)
+; GCN-O0-NEXT:    s_waitcnt expcnt(1)
 ; GCN-O0-NEXT:    v_mov_b32_e32 v0, s4
 ; GCN-O0-NEXT:    v_mov_b32_e32 v1, s5
 ; GCN-O0-NEXT:    v_mov_b32_e32 v2, s6
@@ -1233,14 +1196,13 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 {
 ; GCN-O0-NEXT:  .LBB5_5: ; %Flow2
 ; GCN-O0-NEXT:    ; in Loop: Header=BB5_1 Depth=1
 ; GCN-O0-NEXT:    s_or_saveexec_b64 s[14:15], -1
-; GCN-O0-NEXT:    s_waitcnt expcnt(1)
-; GCN-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload
+; GCN-O0-NEXT:    s_waitcnt expcnt(0)
+; GCN-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[14:15]
 ; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
-; GCN-O0-NEXT:    v_readlane_b32 s4, v4, 10
-; GCN-O0-NEXT:    v_readlane_b32 s5, v4, 11
+; GCN-O0-NEXT:    v_readlane_b32 s4, v6, 10
+; GCN-O0-NEXT:    v_readlane_b32 s5, v6, 11
 ; GCN-O0-NEXT:    s_or_b64 exec, exec, s[4:5]
-; GCN-O0-NEXT:    s_waitcnt expcnt(0)
 ; GCN-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
@@ -1255,14 +1217,13 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 {
 ; GCN-O0-NEXT:  .LBB5_6: ; %Flow
 ; GCN-O0-NEXT:    ; in Loop: Header=BB5_1 Depth=1
 ; GCN-O0-NEXT:    s_or_saveexec_b64 s[14:15], -1
-; GCN-O0-NEXT:    s_waitcnt expcnt(1)
-; GCN-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload
+; GCN-O0-NEXT:    s_waitcnt expcnt(0)
+; GCN-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[14:15]
 ; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
-; GCN-O0-NEXT:    v_readlane_b32 s4, v4, 12
-; GCN-O0-NEXT:    v_readlane_b32 s5, v4, 13
+; GCN-O0-NEXT:    v_readlane_b32 s4, v6, 12
+; GCN-O0-NEXT:    v_readlane_b32 s5, v6, 13
 ; GCN-O0-NEXT:    s_or_b64 exec, exec, s[4:5]
-; GCN-O0-NEXT:    s_waitcnt expcnt(0)
 ; GCN-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
@@ -1277,20 +1238,19 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 {
 ; GCN-O0-NEXT:  .LBB5_7: ; %bb10
 ; GCN-O0-NEXT:    ; in Loop: Header=BB5_1 Depth=1
 ; GCN-O0-NEXT:    s_or_saveexec_b64 s[14:15], -1
-; GCN-O0-NEXT:    s_waitcnt expcnt(3)
-; GCN-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; GCN-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[14:15]
 ; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
-; GCN-O0-NEXT:    v_readlane_b32 s6, v0, 8
-; GCN-O0-NEXT:    v_readlane_b32 s7, v0, 9
+; GCN-O0-NEXT:    v_readlane_b32 s6, v6, 8
+; GCN-O0-NEXT:    v_readlane_b32 s7, v6, 9
 ; GCN-O0-NEXT:    s_mov_b64 s[4:5], -1
-; GCN-O0-NEXT:    v_writelane_b32 v0, s4, 14
-; GCN-O0-NEXT:    v_writelane_b32 v0, s5, 15
+; GCN-O0-NEXT:    v_writelane_b32 v6, s4, 14
+; GCN-O0-NEXT:    v_writelane_b32 v6, s5, 15
 ; GCN-O0-NEXT:    s_mov_b64 s[4:5], exec
-; GCN-O0-NEXT:    v_writelane_b32 v0, s4, 16
-; GCN-O0-NEXT:    v_writelane_b32 v0, s5, 17
+; GCN-O0-NEXT:    v_writelane_b32 v6, s4, 16
+; GCN-O0-NEXT:    v_writelane_b32 v6, s5, 17
 ; GCN-O0-NEXT:    s_or_saveexec_b64 s[14:15], -1
-; GCN-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; GCN-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 ; 4-byte Folded Spill
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[14:15]
 ; GCN-O0-NEXT:    s_and_b64 s[4:5], s[4:5], s[6:7]
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[4:5]
@@ -1299,30 +1259,30 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 {
 ; GCN-O0-NEXT:    ; in Loop: Header=BB5_1 Depth=1
 ; GCN-O0-NEXT:    s_or_saveexec_b64 s[14:15], -1
 ; GCN-O0-NEXT:    s_waitcnt expcnt(0)
-; GCN-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; GCN-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[14:15]
 ; GCN-O0-NEXT:    s_mov_b64 s[4:5], 0
 ; GCN-O0-NEXT:    s_xor_b64 s[4:5], exec, -1
 ; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
-; GCN-O0-NEXT:    v_writelane_b32 v0, s4, 14
-; GCN-O0-NEXT:    v_writelane_b32 v0, s5, 15
+; GCN-O0-NEXT:    v_writelane_b32 v6, s4, 14
+; GCN-O0-NEXT:    v_writelane_b32 v6, s5, 15
 ; GCN-O0-NEXT:    s_or_saveexec_b64 s[14:15], -1
-; GCN-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; GCN-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 ; 4-byte Folded Spill
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[14:15]
 ; GCN-O0-NEXT:  .LBB5_9: ; %Flow3
 ; GCN-O0-NEXT:    ; in Loop: Header=BB5_1 Depth=1
 ; GCN-O0-NEXT:    s_or_saveexec_b64 s[14:15], -1
-; GCN-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload
+; GCN-O0-NEXT:    s_waitcnt expcnt(0)
+; GCN-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[14:15]
 ; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
-; GCN-O0-NEXT:    v_readlane_b32 s8, v4, 16
-; GCN-O0-NEXT:    v_readlane_b32 s9, v4, 17
+; GCN-O0-NEXT:    v_readlane_b32 s8, v6, 16
+; GCN-O0-NEXT:    v_readlane_b32 s9, v6, 17
 ; GCN-O0-NEXT:    s_or_b64 exec, exec, s[8:9]
-; GCN-O0-NEXT:    v_readlane_b32 s6, v4, 4
-; GCN-O0-NEXT:    v_readlane_b32 s7, v4, 5
-; GCN-O0-NEXT:    v_readlane_b32 s4, v4, 14
-; GCN-O0-NEXT:    v_readlane_b32 s5, v4, 15
-; GCN-O0-NEXT:    s_waitcnt expcnt(0)
+; GCN-O0-NEXT:    v_readlane_b32 s6, v6, 4
+; GCN-O0-NEXT:    v_readlane_b32 s7, v6, 5
+; GCN-O0-NEXT:    v_readlane_b32 s4, v6, 14
+; GCN-O0-NEXT:    v_readlane_b32 s5, v6, 15
 ; GCN-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
@@ -1331,15 +1291,15 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 {
 ; GCN-O0-NEXT:    s_or_b64 s[4:5], s[4:5], s[6:7]
 ; GCN-O0-NEXT:    s_mov_b64 s[6:7], 0
 ; GCN-O0-NEXT:    s_mov_b64 s[8:9], s[4:5]
-; GCN-O0-NEXT:    v_writelane_b32 v4, s8, 0
-; GCN-O0-NEXT:    v_writelane_b32 v4, s9, 1
-; GCN-O0-NEXT:    v_writelane_b32 v4, s6, 2
-; GCN-O0-NEXT:    v_writelane_b32 v4, s7, 3
+; GCN-O0-NEXT:    v_writelane_b32 v6, s8, 0
+; GCN-O0-NEXT:    v_writelane_b32 v6, s9, 1
+; GCN-O0-NEXT:    v_writelane_b32 v6, s6, 2
+; GCN-O0-NEXT:    v_writelane_b32 v6, s7, 3
 ; GCN-O0-NEXT:    s_mov_b64 s[6:7], s[4:5]
-; GCN-O0-NEXT:    v_writelane_b32 v4, s6, 18
-; GCN-O0-NEXT:    v_writelane_b32 v4, s7, 19
+; GCN-O0-NEXT:    v_writelane_b32 v6, s6, 18
+; GCN-O0-NEXT:    v_writelane_b32 v6, s7, 19
 ; GCN-O0-NEXT:    s_or_saveexec_b64 s[14:15], -1
-; GCN-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 ; 4-byte Folded Spill
+; GCN-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 ; 4-byte Folded Spill
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[14:15]
 ; GCN-O0-NEXT:    s_waitcnt vmcnt(1)
 ; GCN-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
@@ -1351,47 +1311,42 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 {
 ; GCN-O0-NEXT:    s_cbranch_execnz .LBB5_1
 ; GCN-O0-NEXT:  ; %bb.10: ; %bb12
 ; GCN-O0-NEXT:    s_or_saveexec_b64 s[14:15], -1
-; GCN-O0-NEXT:    s_waitcnt expcnt(3)
-; GCN-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; GCN-O0-NEXT:    s_waitcnt expcnt(4)
+; GCN-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[14:15]
 ; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
-; GCN-O0-NEXT:    v_readlane_b32 s4, v0, 18
-; GCN-O0-NEXT:    v_readlane_b32 s5, v0, 19
+; GCN-O0-NEXT:    v_readlane_b32 s4, v6, 18
+; GCN-O0-NEXT:    v_readlane_b32 s5, v6, 19
 ; GCN-O0-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GCN-O0-NEXT:  ; %bb.11: ; %bb12
-; GCN-O0-NEXT:    s_or_saveexec_b64 s[14:15], -1
-; GCN-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
-; GCN-O0-NEXT:    s_mov_b64 exec, s[14:15]
 ; GCN-O0-NEXT:    s_waitcnt expcnt(0)
-; GCN-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
-; GCN-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
-; GCN-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
-; GCN-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
+; GCN-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
+; GCN-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
+; GCN-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
+; GCN-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
-; GCN-O0-NEXT:    v_mov_b32_e32 v5, v4
+; GCN-O0-NEXT:    v_mov_b32_e32 v4, v3
 ; GCN-O0-NEXT:    ; implicit-def: $sgpr4
-; GCN-O0-NEXT:    v_mov_b32_e32 v6, s4
-; GCN-O0-NEXT:    buffer_store_dword v5, v6, s[0:3], 0 offen
+; GCN-O0-NEXT:    v_mov_b32_e32 v5, s4
+; GCN-O0-NEXT:    buffer_store_dword v4, v5, s[0:3], 0 offen
 ; GCN-O0-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
-; GCN-O0-NEXT:    v_mov_b32_e32 v5, v3
+; GCN-O0-NEXT:    v_mov_b32_e32 v4, v2
 ; GCN-O0-NEXT:    ; implicit-def: $sgpr4
-; GCN-O0-NEXT:    v_mov_b32_e32 v6, s4
-; GCN-O0-NEXT:    buffer_store_dword v5, v6, s[0:3], 0 offen
+; GCN-O0-NEXT:    v_mov_b32_e32 v5, s4
+; GCN-O0-NEXT:    buffer_store_dword v4, v5, s[0:3], 0 offen
 ; GCN-O0-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
-; GCN-O0-NEXT:    v_mov_b32_e32 v5, v2
+; GCN-O0-NEXT:    v_mov_b32_e32 v4, v1
 ; GCN-O0-NEXT:    ; implicit-def: $sgpr4
-; GCN-O0-NEXT:    v_mov_b32_e32 v6, s4
-; GCN-O0-NEXT:    buffer_store_dword v5, v6, s[0:3], 0 offen
+; GCN-O0-NEXT:    v_mov_b32_e32 v5, s4
+; GCN-O0-NEXT:    buffer_store_dword v4, v5, s[0:3], 0 offen
 ; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
-; GCN-O0-NEXT:    ; kill: def $vgpr1 killed $vgpr1 killed $vgpr1_vgpr2_vgpr3_vgpr4 killed $exec
+; GCN-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec
 ; GCN-O0-NEXT:    ; implicit-def: $sgpr4
-; GCN-O0-NEXT:    v_mov_b32_e32 v2, s4
-; GCN-O0-NEXT:    buffer_store_dword v1, v2, s[0:3], 0 offen
+; GCN-O0-NEXT:    v_mov_b32_e32 v1, s4
+; GCN-O0-NEXT:    buffer_store_dword v0, v1, s[0:3], 0 offen
 ; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
-; GCN-O0-NEXT:    ; kill: killed $vgpr0
 ; GCN-O0-NEXT:    s_xor_saveexec_b64 s[4:5], -1
-; GCN-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
-; GCN-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
+; GCN-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[4:5]
 ; GCN-O0-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
 ; GCN-O0-NEXT:    s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/div_i128.ll b/llvm/test/CodeGen/AMDGPU/div_i128.ll
index b2f9bf89d9ec6..508b2b73c0b43 100644
--- a/llvm/test/CodeGen/AMDGPU/div_i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/div_i128.ll
@@ -218,338 +218,332 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0:       ; %bb.0: ; %_udiv-special-cases
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-O0-NEXT:    s_xor_saveexec_b64 s[4:5], -1
-; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:320 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:324 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:328 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:332 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v30, off, s[0:3], s32 offset:320 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[4:5]
-; GFX9-O0-NEXT:    ; implicit-def: $vgpr8 : SGPR spill to VGPR lane
 ; GFX9-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v5
-; GFX9-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v4
+; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v4
+; GFX9-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    s_nop 0
 ; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v2
-; GFX9-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v1
-; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v0
-; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v2
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v1
+; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v0
+; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
-; GFX9-O0-NEXT:    ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v6
+; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v5
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
-; GFX9-O0-NEXT:    ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v3
+; GFX9-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v2
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
-; GFX9-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX9-O0-NEXT:    s_waitcnt vmcnt(4)
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v5
+; GFX9-O0-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
+; GFX9-O0-NEXT:    s_waitcnt vmcnt(3)
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v4
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
-; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
-; GFX9-O0-NEXT:    s_waitcnt vmcnt(1)
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v1
+; GFX9-O0-NEXT:    ; kill: def $vgpr3 killed $vgpr3 def $vgpr3_vgpr4 killed $exec
+; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v0
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4_sgpr5
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4_sgpr5
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4_sgpr5
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4_sgpr5
 ; GFX9-O0-NEXT:    s_mov_b32 s4, 63
-; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v5
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v4
-; GFX9-O0-NEXT:    v_ashrrev_i64 v[13:14], s4, v[11:12]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v3
+; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v3
+; GFX9-O0-NEXT:    v_ashrrev_i64 v[12:13], s4, v[10:11]
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v2
-; GFX9-O0-NEXT:    v_ashrrev_i64 v[11:12], s4, v[11:12]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v5
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v14
-; GFX9-O0-NEXT:    v_xor_b32_e64 v1, v6, v1
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v4
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v13
-; GFX9-O0-NEXT:    v_xor_b32_e64 v13, v4, v5
-; GFX9-O0-NEXT:    ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v14, v1
-; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v10
-; GFX9-O0-NEXT:    v_xor_b32_e64 v1, v6, v1
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v9
-; GFX9-O0-NEXT:    v_xor_b32_e64 v15, v4, v5
-; GFX9-O0-NEXT:    ; kill: def $vgpr15 killed $vgpr15 def $vgpr15_vgpr16 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v1
-; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v15
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v16
-; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v13
-; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v14
-; GFX9-O0-NEXT:    v_sub_co_u32_e32 v9, vcc, v9, v4
-; GFX9-O0-NEXT:    v_subb_co_u32_e32 v5, vcc, v5, v6, vcc
-; GFX9-O0-NEXT:    v_subb_co_u32_e32 v13, vcc, v10, v4, vcc
-; GFX9-O0-NEXT:    v_subb_co_u32_e32 v1, vcc, v1, v6, vcc
+; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v1
+; GFX9-O0-NEXT:    v_ashrrev_i64 v[10:11], s4, v[10:11]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v4
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v13
+; GFX9-O0-NEXT:    v_xor_b32_e64 v0, v5, v0
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v3
+; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v12
+; GFX9-O0-NEXT:    v_xor_b32_e64 v12, v3, v4
+; GFX9-O0-NEXT:    ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v0
+; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v9
+; GFX9-O0-NEXT:    v_xor_b32_e64 v0, v5, v0
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v8
+; GFX9-O0-NEXT:    v_xor_b32_e64 v14, v3, v4
+; GFX9-O0-NEXT:    ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v0
+; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v14
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v15
+; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v12
+; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v13
+; GFX9-O0-NEXT:    v_sub_co_u32_e32 v8, vcc, v8, v3
+; GFX9-O0-NEXT:    v_subb_co_u32_e32 v4, vcc, v4, v5, vcc
+; GFX9-O0-NEXT:    v_subb_co_u32_e32 v12, vcc, v9, v3, vcc
+; GFX9-O0-NEXT:    v_subb_co_u32_e32 v0, vcc, v0, v5, vcc
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
-; GFX9-O0-NEXT:    ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v5
+; GFX9-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v4
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
-; GFX9-O0-NEXT:    ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v14, v1
-; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v3
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v12
-; GFX9-O0-NEXT:    v_xor_b32_e64 v1, v5, v1
-; GFX9-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr2 killed $vgpr2_vgpr3 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v11
-; GFX9-O0-NEXT:    v_xor_b32_e64 v11, v3, v2
-; GFX9-O0-NEXT:    ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v1
-; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v8
-; GFX9-O0-NEXT:    v_xor_b32_e64 v1, v5, v1
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v7
-; GFX9-O0-NEXT:    v_xor_b32_e64 v7, v3, v2
-; GFX9-O0-NEXT:    ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v1
-; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v7
-; GFX9-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 killed $vgpr7_vgpr8 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v11
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v12
-; GFX9-O0-NEXT:    v_sub_co_u32_e32 v1, vcc, v1, v3
-; GFX9-O0-NEXT:    v_subb_co_u32_e32 v8, vcc, v8, v5, vcc
-; GFX9-O0-NEXT:    v_subb_co_u32_e32 v11, vcc, v7, v3, vcc
-; GFX9-O0-NEXT:    v_subb_co_u32_e32 v7, vcc, v2, v5, vcc
+; GFX9-O0-NEXT:    ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v0
+; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v2
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v11
+; GFX9-O0-NEXT:    v_xor_b32_e64 v0, v4, v0
+; GFX9-O0-NEXT:    ; kill: def $vgpr1 killed $vgpr1 killed $vgpr1_vgpr2 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v10
+; GFX9-O0-NEXT:    v_xor_b32_e64 v10, v2, v1
+; GFX9-O0-NEXT:    ; kill: def $vgpr10 killed $vgpr10 def $vgpr10_vgpr11 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v0
+; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v7
+; GFX9-O0-NEXT:    v_xor_b32_e64 v0, v4, v0
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v6
+; GFX9-O0-NEXT:    v_xor_b32_e64 v6, v2, v1
+; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v0
+; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v6
+; GFX9-O0-NEXT:    ; kill: def $vgpr7 killed $vgpr7 killed $vgpr6_vgpr7 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v10
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v11
+; GFX9-O0-NEXT:    v_sub_co_u32_e32 v0, vcc, v0, v2
+; GFX9-O0-NEXT:    v_subb_co_u32_e32 v7, vcc, v7, v4, vcc
+; GFX9-O0-NEXT:    v_subb_co_u32_e32 v10, vcc, v6, v2, vcc
+; GFX9-O0-NEXT:    v_subb_co_u32_e32 v6, vcc, v1, v4, vcc
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
-; GFX9-O0-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v8
+; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v7
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
-; GFX9-O0-NEXT:    ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v7
-; GFX9-O0-NEXT:    v_xor_b32_e64 v5, v5, v6
-; GFX9-O0-NEXT:    v_xor_b32_e64 v3, v3, v4
-; GFX9-O0-NEXT:    ; kill: def $vgpr3 killed $vgpr3 def $vgpr3_vgpr4 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v5
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v4
+; GFX9-O0-NEXT:    ; kill: def $vgpr10 killed $vgpr10 def $vgpr10_vgpr11 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v6
+; GFX9-O0-NEXT:    v_xor_b32_e64 v4, v4, v5
+; GFX9-O0-NEXT:    v_xor_b32_e64 v2, v2, v3
+; GFX9-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v4
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v3
-; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v2
+; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v10
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v11
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v12
-; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v2
+; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v1
-; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v0
+; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v12
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v13
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v14
-; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v8
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v9
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v10
-; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v12
-; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v2
-; GFX9-O0-NEXT:    v_or_b32_e64 v3, v8, v7
+; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v11
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v1
-; GFX9-O0-NEXT:    v_or_b32_e64 v1, v5, v6
-; GFX9-O0-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v3
+; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v1
+; GFX9-O0-NEXT:    v_or_b32_e64 v2, v7, v6
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v10
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v0
+; GFX9-O0-NEXT:    v_or_b32_e64 v0, v4, v5
+; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v2
 ; GFX9-O0-NEXT:    s_mov_b64 s[6:7], 0
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s6, 0
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s7, 1
-; GFX9-O0-NEXT:    v_cmp_eq_u64_e64 s[4:5], v[1:2], s[6:7]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v14
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v10
-; GFX9-O0-NEXT:    v_or_b32_e64 v15, v4, v2
+; GFX9-O0-NEXT:    ; implicit-def: $vgpr30 : SGPR spill to VGPR lane
+; GFX9-O0-NEXT:    v_writelane_b32 v30, s6, 0
+; GFX9-O0-NEXT:    v_writelane_b32 v30, s7, 1
+; GFX9-O0-NEXT:    v_cmp_eq_u64_e64 s[4:5], v[0:1], s[6:7]
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v13
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v9
-; GFX9-O0-NEXT:    v_or_b32_e64 v9, v3, v1
-; GFX9-O0-NEXT:    ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v15
-; GFX9-O0-NEXT:    v_cmp_eq_u64_e64 s[8:9], v[9:10], s[6:7]
+; GFX9-O0-NEXT:    v_or_b32_e64 v14, v3, v1
+; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v12
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v8
+; GFX9-O0-NEXT:    v_or_b32_e64 v8, v2, v0
+; GFX9-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v14
+; GFX9-O0-NEXT:    v_cmp_eq_u64_e64 s[8:9], v[8:9], s[6:7]
 ; GFX9-O0-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
-; GFX9-O0-NEXT:    v_ffbh_u32_e64 v6, v6
+; GFX9-O0-NEXT:    v_ffbh_u32_e64 v5, v5
 ; GFX9-O0-NEXT:    s_mov_b32 s9, 32
-; GFX9-O0-NEXT:    v_add_u32_e64 v6, v6, s9
-; GFX9-O0-NEXT:    v_ffbh_u32_e64 v7, v7
-; GFX9-O0-NEXT:    v_min_u32_e64 v6, v6, v7
+; GFX9-O0-NEXT:    v_add_u32_e64 v5, v5, s9
+; GFX9-O0-NEXT:    v_ffbh_u32_e64 v6, v6
+; GFX9-O0-NEXT:    v_min_u32_e64 v5, v5, v6
 ; GFX9-O0-NEXT:    s_mov_b32 s8, 0
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr10
-; GFX9-O0-NEXT:    v_mov_b32_e32 v9, s8
-; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v9
-; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v7
-; GFX9-O0-NEXT:    v_ffbh_u32_e64 v5, v5
-; GFX9-O0-NEXT:    v_add_u32_e64 v5, v5, s9
-; GFX9-O0-NEXT:    v_ffbh_u32_e64 v8, v8
-; GFX9-O0-NEXT:    v_min_u32_e64 v15, v5, v8
+; GFX9-O0-NEXT:    v_mov_b32_e32 v8, s8
+; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v8
+; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v6
+; GFX9-O0-NEXT:    v_ffbh_u32_e64 v4, v4
+; GFX9-O0-NEXT:    v_add_u32_e64 v4, v4, s9
+; GFX9-O0-NEXT:    v_ffbh_u32_e64 v7, v7
+; GFX9-O0-NEXT:    v_min_u32_e64 v14, v4, v7
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr10
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, s8
-; GFX9-O0-NEXT:    ; kill: def $vgpr15 killed $vgpr15 def $vgpr15_vgpr16 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v5
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s8
+; GFX9-O0-NEXT:    ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v4
 ; GFX9-O0-NEXT:    s_mov_b64 s[10:11], 64
-; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v15
+; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v14
 ; GFX9-O0-NEXT:    s_mov_b32 s12, s10
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v16
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v15
 ; GFX9-O0-NEXT:    s_mov_b32 s14, s11
-; GFX9-O0-NEXT:    v_add_co_u32_e64 v8, s[12:13], v8, s12
-; GFX9-O0-NEXT:    v_mov_b32_e32 v9, s14
-; GFX9-O0-NEXT:    v_addc_co_u32_e64 v5, s[12:13], v5, v9, s[12:13]
-; GFX9-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v5
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v9
+; GFX9-O0-NEXT:    v_add_co_u32_e64 v7, s[12:13], v7, s12
+; GFX9-O0-NEXT:    v_mov_b32_e32 v8, s14
+; GFX9-O0-NEXT:    v_addc_co_u32_e64 v4, s[12:13], v4, v8, s[12:13]
+; GFX9-O0-NEXT:    ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v4
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v8
 ; GFX9-O0-NEXT:    s_mov_b64 s[12:13], s[6:7]
-; GFX9-O0-NEXT:    v_cmp_ne_u64_e64 s[12:13], v[11:12], s[12:13]
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v5, v5, v10, s[12:13]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v8
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v9, v6, v7, s[12:13]
+; GFX9-O0-NEXT:    v_cmp_ne_u64_e64 s[12:13], v[10:11], s[12:13]
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v9, s[12:13]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v5
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v7
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v8, v5, v6, s[12:13]
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr12
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr12
-; GFX9-O0-NEXT:    ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v5
+; GFX9-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v4
+; GFX9-O0-NEXT:    v_ffbh_u32_e64 v4, v0
+; GFX9-O0-NEXT:    v_add_u32_e64 v4, v4, s9
 ; GFX9-O0-NEXT:    v_ffbh_u32_e64 v5, v1
-; GFX9-O0-NEXT:    v_add_u32_e64 v5, v5, s9
-; GFX9-O0-NEXT:    v_ffbh_u32_e64 v6, v2
-; GFX9-O0-NEXT:    v_min_u32_e64 v6, v5, v6
+; GFX9-O0-NEXT:    v_min_u32_e64 v5, v4, v5
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr12
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, s8
-; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v5
-; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v7
-; GFX9-O0-NEXT:    v_ffbh_u32_e64 v5, v3
-; GFX9-O0-NEXT:    v_add_u32_e64 v5, v5, s9
-; GFX9-O0-NEXT:    v_ffbh_u32_e64 v11, v4
-; GFX9-O0-NEXT:    v_min_u32_e64 v15, v5, v11
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s8
+; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v4
+; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v6
+; GFX9-O0-NEXT:    v_ffbh_u32_e64 v4, v2
+; GFX9-O0-NEXT:    v_add_u32_e64 v4, v4, s9
+; GFX9-O0-NEXT:    v_ffbh_u32_e64 v10, v3
+; GFX9-O0-NEXT:    v_min_u32_e64 v14, v4, v10
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr9
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, s8
-; GFX9-O0-NEXT:    ; kill: def $vgpr15 killed $vgpr15 def $vgpr15_vgpr16 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v5
-; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v15
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s8
+; GFX9-O0-NEXT:    ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v4
+; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v14
 ; GFX9-O0-NEXT:    s_mov_b32 s8, s10
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v16
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v15
 ; GFX9-O0-NEXT:    s_mov_b32 s10, s11
-; GFX9-O0-NEXT:    v_add_co_u32_e64 v11, s[8:9], v11, s8
-; GFX9-O0-NEXT:    v_mov_b32_e32 v12, s10
-; GFX9-O0-NEXT:    v_addc_co_u32_e64 v5, s[8:9], v5, v12, s[8:9]
-; GFX9-O0-NEXT:    ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v5
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v12
+; GFX9-O0-NEXT:    v_add_co_u32_e64 v10, s[8:9], v10, s8
+; GFX9-O0-NEXT:    v_mov_b32_e32 v11, s10
+; GFX9-O0-NEXT:    v_addc_co_u32_e64 v4, s[8:9], v4, v11, s[8:9]
+; GFX9-O0-NEXT:    ; kill: def $vgpr10 killed $vgpr10 def $vgpr10_vgpr11 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v4
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v11
 ; GFX9-O0-NEXT:    s_mov_b64 s[8:9], s[6:7]
-; GFX9-O0-NEXT:    v_cmp_ne_u64_e64 s[8:9], v[13:14], s[8:9]
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v5, v5, v8, s[8:9]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v11
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v6, v6, v7, s[8:9]
+; GFX9-O0-NEXT:    v_cmp_ne_u64_e64 s[8:9], v[12:13], s[8:9]
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v7, s[8:9]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v5
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v10
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v5, v5, v6, s[8:9]
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
-; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
+; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v4
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v5
-; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v6
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v8
+; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 killed $vgpr5_vgpr6 killed $exec
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v9
-; GFX9-O0-NEXT:    ; kill: def $vgpr7 killed $vgpr7 killed $vgpr6_vgpr7 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v10
 ; GFX9-O0-NEXT:    s_mov_b32 s10, s6
 ; GFX9-O0-NEXT:    s_mov_b32 s11, s7
-; GFX9-O0-NEXT:    v_sub_co_u32_e32 v5, vcc, v5, v8
-; GFX9-O0-NEXT:    v_subb_co_u32_e32 v9, vcc, v6, v7, vcc
-; GFX9-O0-NEXT:    v_mov_b32_e32 v7, s10
+; GFX9-O0-NEXT:    v_sub_co_u32_e32 v4, vcc, v4, v7
+; GFX9-O0-NEXT:    v_subb_co_u32_e32 v8, vcc, v5, v6, vcc
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v6, s10
-; GFX9-O0-NEXT:    v_subb_co_u32_e32 v8, vcc, v6, v7, vcc
-; GFX9-O0-NEXT:    v_mov_b32_e32 v7, s11
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, s10
+; GFX9-O0-NEXT:    v_subb_co_u32_e32 v7, vcc, v5, v6, vcc
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v6, s11
-; GFX9-O0-NEXT:    v_subb_co_u32_e32 v7, vcc, v6, v7, vcc
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, s11
+; GFX9-O0-NEXT:    v_subb_co_u32_e32 v6, vcc, v5, v6, vcc
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
-; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v9
+; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v8
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
-; GFX9-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v7
-; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v9
+; GFX9-O0-NEXT:    ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v6
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v8
-; GFX9-O0-NEXT:    buffer_store_dword v10, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v7
+; GFX9-O0-NEXT:    buffer_store_dword v9, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v11, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v6
+; GFX9-O0-NEXT:    buffer_store_dword v10, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v5
-; GFX9-O0-NEXT:    buffer_store_dword v10, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v4
+; GFX9-O0-NEXT:    buffer_store_dword v9, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v11, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v10, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_mov_b64 s[8:9], s[6:7]
-; GFX9-O0-NEXT:    v_cmp_eq_u64_e64 s[8:9], v[8:9], s[8:9]
+; GFX9-O0-NEXT:    v_cmp_eq_u64_e64 s[8:9], v[7:8], s[8:9]
 ; GFX9-O0-NEXT:    s_mov_b64 s[12:13], 0x7f
 ; GFX9-O0-NEXT:    s_mov_b64 s[14:15], s[12:13]
-; GFX9-O0-NEXT:    v_cmp_gt_u64_e64 s[14:15], v[5:6], s[14:15]
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[14:15]
+; GFX9-O0-NEXT:    v_cmp_gt_u64_e64 s[14:15], v[4:5], s[14:15]
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[14:15]
 ; GFX9-O0-NEXT:    s_mov_b64 s[14:15], s[6:7]
-; GFX9-O0-NEXT:    v_cmp_ne_u64_e64 s[14:15], v[8:9], s[14:15]
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s[14:15]
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v7, v7, v10, s[8:9]
-; GFX9-O0-NEXT:    v_and_b32_e64 v7, 1, v7
-; GFX9-O0-NEXT:    v_cmp_eq_u32_e64 s[8:9], v7, 1
+; GFX9-O0-NEXT:    v_cmp_ne_u64_e64 s[14:15], v[7:8], s[14:15]
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s[14:15]
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v6, v6, v9, s[8:9]
+; GFX9-O0-NEXT:    v_and_b32_e64 v6, 1, v6
+; GFX9-O0-NEXT:    v_cmp_eq_u32_e64 s[8:9], v6, 1
 ; GFX9-O0-NEXT:    s_or_b64 s[8:9], s[4:5], s[8:9]
 ; GFX9-O0-NEXT:    s_mov_b64 s[4:5], -1
 ; GFX9-O0-NEXT:    s_xor_b64 s[4:5], s[8:9], s[4:5]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v6
+; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v5
 ; GFX9-O0-NEXT:    s_mov_b32 s14, s13
-; GFX9-O0-NEXT:    v_xor_b32_e64 v7, v7, s14
-; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 killed $vgpr5_vgpr6 killed $exec
+; GFX9-O0-NEXT:    v_xor_b32_e64 v6, v6, s14
+; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
 ; GFX9-O0-NEXT:    ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13
-; GFX9-O0-NEXT:    v_xor_b32_e64 v5, v5, s12
-; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v7
-; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v9
-; GFX9-O0-NEXT:    v_or_b32_e64 v7, v7, v10
-; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 killed $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v8
-; GFX9-O0-NEXT:    v_or_b32_e64 v5, v5, v6
-; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v7
-; GFX9-O0-NEXT:    v_cmp_ne_u64_e64 s[6:7], v[5:6], s[6:7]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, s11
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v5, v2, v5, s[8:9]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, s10
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v1, v1, v2, s[8:9]
+; GFX9-O0-NEXT:    v_xor_b32_e64 v4, v4, s12
+; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v6
+; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v5
+; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v8
+; GFX9-O0-NEXT:    v_or_b32_e64 v6, v6, v9
+; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v7
+; GFX9-O0-NEXT:    v_or_b32_e64 v4, v4, v5
+; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v6
+; GFX9-O0-NEXT:    v_cmp_ne_u64_e64 s[6:7], v[4:5], s[6:7]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s11
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v4, v1, v4, s[8:9]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, s10
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v0, v0, v1, s[8:9]
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr12
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr12
-; GFX9-O0-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v5
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, s11
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v5, v4, v5, s[8:9]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s10
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v3, v3, v4, s[8:9]
+; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v4
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s11
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v4, v3, v4, s[8:9]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v3, s10
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v2, v2, v3, s[8:9]
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
-; GFX9-O0-NEXT:    ; kill: def $vgpr3 killed $vgpr3 def $vgpr3_vgpr4 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v5
+; GFX9-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v4
 ; GFX9-O0-NEXT:    s_and_b64 s[6:7], s[4:5], s[6:7]
-; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_mov_b64 s[4:5], exec
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s4, 2
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s5, 3
+; GFX9-O0-NEXT:    v_writelane_b32 v30, s4, 2
+; GFX9-O0-NEXT:    v_writelane_b32 v30, s5, 3
 ; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
 ; GFX9-O0-NEXT:    s_and_b64 s[4:5], s[4:5], s[6:7]
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[4:5]
@@ -557,11 +551,11 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    s_branch .LBB0_8
 ; GFX9-O0-NEXT:  .LBB0_1: ; %Flow
 ; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    v_readlane_b32 s4, v0, 4
-; GFX9-O0-NEXT:    v_readlane_b32 s5, v0, 5
+; GFX9-O0-NEXT:    v_readlane_b32 s4, v30, 4
+; GFX9-O0-NEXT:    v_readlane_b32 s5, v30, 5
 ; GFX9-O0-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX9-O0-NEXT:  ; %bb.2: ; %Flow
 ; GFX9-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:128 ; 4-byte Folded Reload
@@ -588,11 +582,11 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    s_branch .LBB0_5
 ; GFX9-O0-NEXT:  .LBB0_3: ; %Flow2
 ; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    v_readlane_b32 s4, v4, 2
-; GFX9-O0-NEXT:    v_readlane_b32 s5, v4, 3
+; GFX9-O0-NEXT:    v_readlane_b32 s4, v30, 2
+; GFX9-O0-NEXT:    v_readlane_b32 s5, v30, 3
 ; GFX9-O0-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
@@ -648,11 +642,11 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    s_branch .LBB0_3
 ; GFX9-O0-NEXT:  .LBB0_5: ; %Flow1
 ; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    v_readlane_b32 s4, v8, 6
-; GFX9-O0-NEXT:    v_readlane_b32 s5, v8, 7
+; GFX9-O0-NEXT:    v_readlane_b32 s4, v30, 6
+; GFX9-O0-NEXT:    v_readlane_b32 s5, v30, 7
 ; GFX9-O0-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:120 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:124 ; 4-byte Folded Reload
@@ -679,85 +673,85 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:  .LBB0_6: ; %udiv-do-while
 ; GFX9-O0-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT:    buffer_load_dword v16, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    v_readlane_b32 s6, v16, 8
-; GFX9-O0-NEXT:    v_readlane_b32 s7, v16, 9
+; GFX9-O0-NEXT:    v_readlane_b32 s6, v30, 8
+; GFX9-O0-NEXT:    v_readlane_b32 s7, v30, 9
 ; GFX9-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:208 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:212 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v23, off, s[0:3], s32 offset:224 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v24, off, s[0:3], s32 offset:228 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v22, off, s[0:3], s32 offset:224 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v23, off, s[0:3], s32 offset:228 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v17, off, s[0:3], s32 offset:240 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v18, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v16, off, s[0:3], s32 offset:240 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v17, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v9, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v27, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v28, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v25, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v26, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v19, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v20, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v21, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v22, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v26, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v27, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v24, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v25, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v18, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v19, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v20, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v21, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v14, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v15, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v12, off, s[0:3], s32 offset:284 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_mov_b32 s4, 63
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(16)
-; GFX9-O0-NEXT:    v_lshrrev_b64 v[29:30], s4, v[2:3]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v30
+; GFX9-O0-NEXT:    v_lshrrev_b64 v[28:29], s4, v[2:3]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v29
 ; GFX9-O0-NEXT:    s_mov_b32 s5, 1
-; GFX9-O0-NEXT:    v_lshlrev_b64 v[23:24], s5, v[23:24]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v24
+; GFX9-O0-NEXT:    v_lshlrev_b64 v[22:23], s5, v[22:23]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v23
 ; GFX9-O0-NEXT:    v_or_b32_e64 v4, v4, v5
-; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v29
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v23
-; GFX9-O0-NEXT:    v_or_b32_e64 v23, v5, v10
-; GFX9-O0-NEXT:    ; kill: def $vgpr23 killed $vgpr23 def $vgpr23_vgpr24 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v24, v4
-; GFX9-O0-NEXT:    v_lshlrev_b64 v[29:30], s5, v[2:3]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v28
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v22
+; GFX9-O0-NEXT:    v_or_b32_e64 v22, v5, v10
+; GFX9-O0-NEXT:    ; kill: def $vgpr22 killed $vgpr22 def $vgpr22_vgpr23 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v23, v4
+; GFX9-O0-NEXT:    v_lshlrev_b64 v[28:29], s5, v[2:3]
 ; GFX9-O0-NEXT:    v_lshrrev_b64 v[4:5], s4, v[6:7]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v30
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v29
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v5
 ; GFX9-O0-NEXT:    v_or_b32_e64 v2, v2, v3
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v29
+; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v28
 ; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
 ; GFX9-O0-NEXT:    v_or_b32_e64 v4, v3, v4
 ; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v2
 ; GFX9-O0-NEXT:    v_lshlrev_b64 v[2:3], s5, v[0:1]
-; GFX9-O0-NEXT:    v_lshlrev_b64 v[29:30], s5, v[6:7]
+; GFX9-O0-NEXT:    v_lshlrev_b64 v[28:29], s5, v[6:7]
 ; GFX9-O0-NEXT:    v_lshrrev_b64 v[0:1], s4, v[0:1]
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v1
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v30
+; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v29
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(10)
-; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v28
+; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v27
 ; GFX9-O0-NEXT:    v_or3_b32 v6, v6, v7, v10
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v0
-; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v29
-; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v27
+; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v28
+; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v26
 ; GFX9-O0-NEXT:    v_or3_b32 v0, v0, v1, v7
 ; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v6
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v3
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(8)
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v26
+; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v25
 ; GFX9-O0-NEXT:    v_or_b32_e64 v6, v6, v7
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v2
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v25
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v24
 ; GFX9-O0-NEXT:    v_or_b32_e64 v2, v2, v3
 ; GFX9-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v6
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v4
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v5
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v23
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v24
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v22
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v23
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v11
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v14
@@ -782,22 +776,22 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v12
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v12, 0
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v13, 0
-; GFX9-O0-NEXT:    v_mov_b32_e32 v23, v22
-; GFX9-O0-NEXT:    v_and_b32_e64 v23, v7, v23
-; GFX9-O0-NEXT:    v_and_b32_e64 v21, v11, v21
-; GFX9-O0-NEXT:    ; kill: def $vgpr21 killed $vgpr21 def $vgpr21_vgpr22 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v22, v23
-; GFX9-O0-NEXT:    v_mov_b32_e32 v23, v20
-; GFX9-O0-NEXT:    v_and_b32_e64 v7, v7, v23
-; GFX9-O0-NEXT:    v_and_b32_e64 v23, v11, v19
-; GFX9-O0-NEXT:    ; kill: def $vgpr23 killed $vgpr23 def $vgpr23_vgpr24 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v24, v7
-; GFX9-O0-NEXT:    v_mov_b32_e32 v20, v23
-; GFX9-O0-NEXT:    v_mov_b32_e32 v19, v24
-; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v21
-; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v22
-; GFX9-O0-NEXT:    v_sub_co_u32_e32 v6, vcc, v6, v20
-; GFX9-O0-NEXT:    v_subb_co_u32_e32 v10, vcc, v10, v19, vcc
+; GFX9-O0-NEXT:    v_mov_b32_e32 v22, v21
+; GFX9-O0-NEXT:    v_and_b32_e64 v22, v7, v22
+; GFX9-O0-NEXT:    v_and_b32_e64 v20, v11, v20
+; GFX9-O0-NEXT:    ; kill: def $vgpr20 killed $vgpr20 def $vgpr20_vgpr21 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v21, v22
+; GFX9-O0-NEXT:    v_mov_b32_e32 v22, v19
+; GFX9-O0-NEXT:    v_and_b32_e64 v7, v7, v22
+; GFX9-O0-NEXT:    v_and_b32_e64 v22, v11, v18
+; GFX9-O0-NEXT:    ; kill: def $vgpr22 killed $vgpr22 def $vgpr22_vgpr23 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v23, v7
+; GFX9-O0-NEXT:    v_mov_b32_e32 v19, v22
+; GFX9-O0-NEXT:    v_mov_b32_e32 v18, v23
+; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v20
+; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v21
+; GFX9-O0-NEXT:    v_sub_co_u32_e32 v6, vcc, v6, v19
+; GFX9-O0-NEXT:    v_subb_co_u32_e32 v10, vcc, v10, v18, vcc
 ; GFX9-O0-NEXT:    v_subb_co_u32_e32 v4, vcc, v4, v11, vcc
 ; GFX9-O0-NEXT:    v_subb_co_u32_e32 v7, vcc, v5, v7, vcc
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
@@ -813,66 +807,66 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    s_mov_b64 s[8:9], -1
 ; GFX9-O0-NEXT:    s_mov_b32 s5, s8
 ; GFX9-O0-NEXT:    s_mov_b32 s4, s9
-; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v17
-; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v18
-; GFX9-O0-NEXT:    v_mov_b32_e32 v17, s5
-; GFX9-O0-NEXT:    v_add_co_u32_e32 v20, vcc, v11, v17
+; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v16
+; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v17
+; GFX9-O0-NEXT:    v_mov_b32_e32 v16, s5
+; GFX9-O0-NEXT:    v_add_co_u32_e32 v19, vcc, v11, v16
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v11, s4
 ; GFX9-O0-NEXT:    v_addc_co_u32_e32 v9, vcc, v9, v11, vcc
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v11, s5
-; GFX9-O0-NEXT:    v_addc_co_u32_e32 v17, vcc, v10, v11, vcc
+; GFX9-O0-NEXT:    v_addc_co_u32_e32 v16, vcc, v10, v11, vcc
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v10, s4
 ; GFX9-O0-NEXT:    v_addc_co_u32_e32 v8, vcc, v8, v10, vcc
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
-; GFX9-O0-NEXT:    ; kill: def $vgpr20 killed $vgpr20 def $vgpr20_vgpr21 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v21, v9
+; GFX9-O0-NEXT:    ; kill: def $vgpr19 killed $vgpr19 def $vgpr19_vgpr20 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v20, v9
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
-; GFX9-O0-NEXT:    ; kill: def $vgpr17 killed $vgpr17 def $vgpr17_vgpr18 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v18, v8
-; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v17
-; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v18
-; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v20
-; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v21
-; GFX9-O0-NEXT:    v_mov_b32_e32 v22, v18
-; GFX9-O0-NEXT:    v_mov_b32_e32 v19, v21
-; GFX9-O0-NEXT:    v_or_b32_e64 v19, v19, v22
-; GFX9-O0-NEXT:    v_mov_b32_e32 v18, v17
-; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v20
-; GFX9-O0-NEXT:    v_or_b32_e64 v17, v17, v18
-; GFX9-O0-NEXT:    ; kill: def $vgpr17 killed $vgpr17 def $vgpr17_vgpr18 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v18, v19
-; GFX9-O0-NEXT:    v_cmp_eq_u64_e64 s[4:5], v[17:18], v[12:13]
+; GFX9-O0-NEXT:    ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v8
+; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v16
+; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v17
+; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v19
+; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v20
+; GFX9-O0-NEXT:    v_mov_b32_e32 v21, v17
+; GFX9-O0-NEXT:    v_mov_b32_e32 v18, v20
+; GFX9-O0-NEXT:    v_or_b32_e64 v18, v18, v21
+; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v16
+; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v19
+; GFX9-O0-NEXT:    v_or_b32_e64 v16, v16, v17
+; GFX9-O0-NEXT:    ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v18
+; GFX9-O0-NEXT:    v_cmp_eq_u64_e64 s[4:5], v[16:17], v[12:13]
 ; GFX9-O0-NEXT:    s_or_b64 s[4:5], s[4:5], s[6:7]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v18, v3
-; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v2
-; GFX9-O0-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v18, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    v_mov_b32_e32 v18, v1
-; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v0
-; GFX9-O0-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v18, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    v_mov_b32_e32 v18, v15
-; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v14
-; GFX9-O0-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v18, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    v_mov_b32_e32 v18, v13
-; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v12
-; GFX9-O0-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v18, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v3
+; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v2
+; GFX9-O0-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-O0-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v1
+; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v0
+; GFX9-O0-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-O0-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v15
+; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v14
+; GFX9-O0-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-O0-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v13
+; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v12
+; GFX9-O0-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-O0-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_mov_b64 s[6:7], s[4:5]
-; GFX9-O0-NEXT:    v_writelane_b32 v16, s6, 4
-; GFX9-O0-NEXT:    v_writelane_b32 v16, s7, 5
+; GFX9-O0-NEXT:    v_writelane_b32 v30, s6, 4
+; GFX9-O0-NEXT:    v_writelane_b32 v30, s7, 5
 ; GFX9-O0-NEXT:    s_mov_b64 s[6:7], s[4:5]
-; GFX9-O0-NEXT:    v_writelane_b32 v16, s6, 8
-; GFX9-O0-NEXT:    v_writelane_b32 v16, s7, 9
+; GFX9-O0-NEXT:    v_writelane_b32 v30, s6, 8
+; GFX9-O0-NEXT:    v_writelane_b32 v30, s7, 9
 ; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT:    buffer_store_dword v16, off, s[0:3], s32 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
 ; GFX9-O0-NEXT:    buffer_store_dword v14, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
@@ -902,6 +896,9 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    s_cbranch_execnz .LBB0_6
 ; GFX9-O0-NEXT:    s_branch .LBB0_1
 ; GFX9-O0-NEXT:  .LBB0_7: ; %udiv-preheader
+; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
+; GFX9-O0-NEXT:    buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
 ; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:288 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:292 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:296 ; 4-byte Folded Reload
@@ -910,52 +907,49 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    buffer_load_dword v9, off, s[0:3], s32 offset:308 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v10, off, s[0:3], s32 offset:312 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:316 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT:    buffer_load_dword v16, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
-; GFX9-O0-NEXT:    buffer_load_dword v17, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v18, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v16, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v17, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v13, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v14, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v19, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v20, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v21, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v22, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    s_waitcnt vmcnt(9)
+; GFX9-O0-NEXT:    buffer_load_dword v18, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v19, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v20, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v21, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    s_waitcnt vmcnt(8)
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v10
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    v_lshrrev_b64 v[6:7], v4, v[21:22]
+; GFX9-O0-NEXT:    v_lshrrev_b64 v[6:7], v4, v[20:21]
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v7
 ; GFX9-O0-NEXT:    s_mov_b32 s6, 64
 ; GFX9-O0-NEXT:    v_sub_u32_e64 v12, s6, v4
-; GFX9-O0-NEXT:    v_lshlrev_b64 v[23:24], v12, v[19:20]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v24
+; GFX9-O0-NEXT:    v_lshlrev_b64 v[22:23], v12, v[18:19]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v23
 ; GFX9-O0-NEXT:    v_or_b32_e64 v5, v5, v12
 ; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 killed $vgpr6_vgpr7 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v23
+; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v22
 ; GFX9-O0-NEXT:    v_or_b32_e64 v6, v6, v7
 ; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v5
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v7
 ; GFX9-O0-NEXT:    v_cmp_lt_u32_e64 s[4:5], v4, s6
 ; GFX9-O0-NEXT:    v_sub_u32_e64 v5, v4, s6
-; GFX9-O0-NEXT:    v_lshrrev_b64 v[23:24], v5, v[19:20]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v24
+; GFX9-O0-NEXT:    v_lshrrev_b64 v[22:23], v5, v[18:19]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v23
 ; GFX9-O0-NEXT:    v_cndmask_b32_e64 v5, v5, v12, s[4:5]
 ; GFX9-O0-NEXT:    s_mov_b32 s6, 0
 ; GFX9-O0-NEXT:    v_cmp_eq_u32_e64 s[6:7], v4, s6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v22
+; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v21
 ; GFX9-O0-NEXT:    v_cndmask_b32_e64 v5, v5, v12, s[6:7]
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v23
+; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v22
 ; GFX9-O0-NEXT:    v_cndmask_b32_e64 v6, v6, v7, s[4:5]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v21
+; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v20
 ; GFX9-O0-NEXT:    v_cndmask_b32_e64 v6, v6, v7, s[6:7]
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
 ; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v5
-; GFX9-O0-NEXT:    v_lshrrev_b64 v[4:5], v4, v[19:20]
+; GFX9-O0-NEXT:    v_lshrrev_b64 v[4:5], v4, v[18:19]
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v5
 ; GFX9-O0-NEXT:    s_mov_b64 s[6:7], 0
 ; GFX9-O0-NEXT:    s_mov_b32 s8, s7
@@ -974,12 +968,12 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    s_mov_b64 s[8:9], -1
 ; GFX9-O0-NEXT:    s_mov_b32 s5, s8
 ; GFX9-O0-NEXT:    s_mov_b32 s4, s9
-; GFX9-O0-NEXT:    v_mov_b32_e32 v14, v17
-; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v18
-; GFX9-O0-NEXT:    v_mov_b32_e32 v17, s5
-; GFX9-O0-NEXT:    v_add_co_u32_e32 v12, vcc, v12, v17
-; GFX9-O0-NEXT:    v_mov_b32_e32 v17, s4
-; GFX9-O0-NEXT:    v_addc_co_u32_e32 v17, vcc, v15, v17, vcc
+; GFX9-O0-NEXT:    v_mov_b32_e32 v14, v16
+; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v17
+; GFX9-O0-NEXT:    v_mov_b32_e32 v16, s5
+; GFX9-O0-NEXT:    v_add_co_u32_e32 v12, vcc, v12, v16
+; GFX9-O0-NEXT:    v_mov_b32_e32 v16, s4
+; GFX9-O0-NEXT:    v_addc_co_u32_e32 v16, vcc, v15, v16, vcc
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v15, s5
 ; GFX9-O0-NEXT:    v_addc_co_u32_e32 v14, vcc, v14, v15, vcc
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v15, s4
@@ -991,7 +985,7 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
 ; GFX9-O0-NEXT:    ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v17
+; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v16
 ; GFX9-O0-NEXT:    s_mov_b64 s[8:9], s[6:7]
 ; GFX9-O0-NEXT:    buffer_store_dword v14, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
@@ -1004,10 +998,10 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v14, s8
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v13, s7
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v12, s6
-; GFX9-O0-NEXT:    v_writelane_b32 v16, s4, 8
-; GFX9-O0-NEXT:    v_writelane_b32 v16, s5, 9
+; GFX9-O0-NEXT:    v_writelane_b32 v30, s4, 8
+; GFX9-O0-NEXT:    v_writelane_b32 v30, s5, 9
 ; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT:    buffer_store_dword v16, off, s[0:3], s32 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
 ; GFX9-O0-NEXT:    buffer_store_dword v14, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
@@ -1036,199 +1030,191 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    s_branch .LBB0_6
 ; GFX9-O0-NEXT:  .LBB0_8: ; %udiv-bb1
 ; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
-; GFX9-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v12, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v10, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_mov_b64 s[6:7], 1
 ; GFX9-O0-NEXT:    s_mov_b32 s5, s6
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v1
+; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v0
 ; GFX9-O0-NEXT:    s_mov_b32 s4, s7
 ; GFX9-O0-NEXT:    s_mov_b64 s[6:7], 0
 ; GFX9-O0-NEXT:    s_mov_b32 s8, s6
 ; GFX9-O0-NEXT:    s_mov_b32 s9, s7
-; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v5
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, s5
-; GFX9-O0-NEXT:    v_add_co_u32_e32 v9, vcc, v4, v5
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, s4
-; GFX9-O0-NEXT:    v_addc_co_u32_e32 v2, vcc, v2, v5, vcc
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, s8
-; GFX9-O0-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v5, vcc
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, s9
-; GFX9-O0-NEXT:    v_addc_co_u32_e32 v3, vcc, v3, v5, vcc
+; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v4
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v5
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s5
+; GFX9-O0-NEXT:    v_add_co_u32_e32 v8, vcc, v3, v4
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s4
+; GFX9-O0-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v4, vcc
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s8
+; GFX9-O0-NEXT:    v_addc_co_u32_e32 v0, vcc, v0, v4, vcc
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s9
+; GFX9-O0-NEXT:    v_addc_co_u32_e32 v2, vcc, v2, v4, vcc
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
-; GFX9-O0-NEXT:    ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v2
+; GFX9-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v1
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
-; GFX9-O0-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v3
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v2
+; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v2
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v1
-; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:304 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v0
+; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:304 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v8
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v9
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v10
-; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:312 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:312 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:316 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:316 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_mov_b32 s4, 0x7f
-; GFX9-O0-NEXT:    v_sub_u32_e64 v3, s4, v4
-; GFX9-O0-NEXT:    v_lshlrev_b64 v[5:6], v3, v[11:12]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v6
+; GFX9-O0-NEXT:    v_sub_u32_e64 v2, s4, v3
+; GFX9-O0-NEXT:    v_lshlrev_b64 v[4:5], v2, v[10:11]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v5
 ; GFX9-O0-NEXT:    s_mov_b32 s4, 64
-; GFX9-O0-NEXT:    v_sub_u32_e64 v14, s4, v3
-; GFX9-O0-NEXT:    v_lshrrev_b64 v[14:15], v14, v[7:8]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v15
-; GFX9-O0-NEXT:    v_or_b32_e64 v13, v13, v16
-; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 killed $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v14
-; GFX9-O0-NEXT:    v_or_b32_e64 v5, v5, v6
-; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v13
-; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v6
-; GFX9-O0-NEXT:    v_cmp_lt_u32_e64 s[4:5], v3, s4
+; GFX9-O0-NEXT:    v_sub_u32_e64 v13, s4, v2
+; GFX9-O0-NEXT:    v_lshrrev_b64 v[13:14], v13, v[6:7]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v14
+; GFX9-O0-NEXT:    v_or_b32_e64 v12, v12, v15
+; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v13
+; GFX9-O0-NEXT:    v_or_b32_e64 v4, v4, v5
+; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v12
+; GFX9-O0-NEXT:    v_mov_b32_e32 v14, v5
+; GFX9-O0-NEXT:    v_cmp_lt_u32_e64 s[4:5], v2, s4
 ; GFX9-O0-NEXT:    s_mov_b32 s10, 63
-; GFX9-O0-NEXT:    v_sub_u32_e64 v4, s10, v4
-; GFX9-O0-NEXT:    v_lshlrev_b64 v[13:14], v4, v[7:8]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v14
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v15, s[4:5]
+; GFX9-O0-NEXT:    v_sub_u32_e64 v3, s10, v3
+; GFX9-O0-NEXT:    v_lshlrev_b64 v[12:13], v3, v[6:7]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v13
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v3, v3, v14, s[4:5]
 ; GFX9-O0-NEXT:    s_mov_b32 s10, 0
-; GFX9-O0-NEXT:    v_cmp_eq_u32_e64 s[10:11], v3, s10
-; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v12
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v15, s[10:11]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v5
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v13
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v5, v5, v6, s[4:5]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v11
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v5, v5, v6, s[10:11]
+; GFX9-O0-NEXT:    v_cmp_eq_u32_e64 s[10:11], v2, s10
+; GFX9-O0-NEXT:    v_mov_b32_e32 v14, v11
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v3, v3, v14, s[10:11]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v4
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v12
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v5, s[4:5]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v10
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v5, s[10:11]
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr10
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr10
-; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v4
-; GFX9-O0-NEXT:    v_lshlrev_b64 v[7:8], v3, v[7:8]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v8
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, s9
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v3, v3, v4, s[4:5]
-; GFX9-O0-NEXT:    ; kill: def $vgpr7 killed $vgpr7 killed $vgpr7_vgpr8 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s8
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v7, v4, v7, s[4:5]
+; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v3
+; GFX9-O0-NEXT:    v_lshlrev_b64 v[6:7], v2, v[6:7]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v7
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, s9
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v2, v2, v3, s[4:5]
+; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 killed $vgpr6_vgpr7 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v3, s8
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v6, v3, v6, s[4:5]
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
-; GFX9-O0-NEXT:    ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v3
-; GFX9-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v2
+; GFX9-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:300 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:300 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v2
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v10
-; GFX9-O0-NEXT:    v_or_b32_e64 v3, v3, v4
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v1
-; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v9
-; GFX9-O0-NEXT:    v_or_b32_e64 v1, v1, v2
-; GFX9-O0-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v3
-; GFX9-O0-NEXT:    v_cmp_ne_u64_e64 s[4:5], v[1:2], s[6:7]
+; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v1
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v9
+; GFX9-O0-NEXT:    v_or_b32_e64 v2, v2, v3
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v0
+; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v8
+; GFX9-O0-NEXT:    v_or_b32_e64 v0, v0, v1
+; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v2
+; GFX9-O0-NEXT:    v_cmp_ne_u64_e64 s[4:5], v[0:1], s[6:7]
 ; GFX9-O0-NEXT:    s_mov_b64 s[8:9], s[6:7]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, s8
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s9
-; GFX9-O0-NEXT:    v_mov_b32_e32 v1, s6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, s7
-; GFX9-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, s8
+; GFX9-O0-NEXT:    v_mov_b32_e32 v3, s9
+; GFX9-O0-NEXT:    v_mov_b32_e32 v0, s6
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, s7
+; GFX9-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_mov_b64 s[6:7], exec
 ; GFX9-O0-NEXT:    s_and_b64 s[4:5], s[6:7], s[4:5]
 ; GFX9-O0-NEXT:    s_xor_b64 s[6:7], s[4:5], s[6:7]
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s6, 6
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s7, 7
+; GFX9-O0-NEXT:    v_writelane_b32 v30, s6, 6
+; GFX9-O0-NEXT:    v_writelane_b32 v30, s7, 7
 ; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[4:5]
 ; GFX9-O0-NEXT:    s_cbranch_execz .LBB0_5
 ; GFX9-O0-NEXT:    s_branch .LBB0_7
 ; GFX9-O0-NEXT:  .LBB0_9: ; %udiv-end
-; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
 ; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:160 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:164 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v9, off, s[0:3], s32 offset:168 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v10, off, s[0:3], s32 offset:172 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v10, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:168 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:172 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v10
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v8
+; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v6
 ; GFX9-O0-NEXT:    v_xor_b32_e64 v3, v3, v2
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v7
 ; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 killed $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v9
-; GFX9-O0-NEXT:    v_xor_b32_e64 v9, v6, v5
-; GFX9-O0-NEXT:    ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v3
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v8
+; GFX9-O0-NEXT:    v_xor_b32_e64 v8, v5, v4
+; GFX9-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v3
+; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v11
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v1
 ; GFX9-O0-NEXT:    v_xor_b32_e64 v3, v3, v6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v7
+; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v10
 ; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
-; GFX9-O0-NEXT:    v_xor_b32_e64 v0, v0, v8
+; GFX9-O0-NEXT:    v_xor_b32_e64 v0, v0, v7
 ; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v3
-; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v0
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v0
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v1
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v9
-; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v10
-; GFX9-O0-NEXT:    v_sub_co_u32_e32 v7, vcc, v7, v8
+; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v8
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v9
+; GFX9-O0-NEXT:    v_sub_co_u32_e32 v5, vcc, v5, v7
 ; GFX9-O0-NEXT:    v_subb_co_u32_e32 v0, vcc, v0, v6, vcc
-; GFX9-O0-NEXT:    v_subb_co_u32_e32 v5, vcc, v3, v5, vcc
+; GFX9-O0-NEXT:    v_subb_co_u32_e32 v3, vcc, v3, v4, vcc
 ; GFX9-O0-NEXT:    v_subb_co_u32_e32 v1, vcc, v1, v2, vcc
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
-; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v1
+; GFX9-O0-NEXT:    ; kill: def $vgpr3 killed $vgpr3 def $vgpr3_vgpr4 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v1
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
-; GFX9-O0-NEXT:    ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v0
-; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v7
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v5
+; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v0
+; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v5
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v3
 ; GFX9-O0-NEXT:    s_mov_b32 s4, 32
-; GFX9-O0-NEXT:    v_lshrrev_b64 v[7:8], s4, v[7:8]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v7
 ; GFX9-O0-NEXT:    v_lshrrev_b64 v[5:6], s4, v[5:6]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v5
-; GFX9-O0-NEXT:    ; kill: killed $vgpr4
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v5
+; GFX9-O0-NEXT:    v_lshrrev_b64 v[3:4], s4, v[3:4]
+; GFX9-O0-NEXT:    ; kill: def $vgpr3 killed $vgpr3 killed $vgpr3_vgpr4 killed $exec
 ; GFX9-O0-NEXT:    s_xor_saveexec_b64 s[4:5], -1
-; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:320 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    s_nop 0
-; GFX9-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:324 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:328 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v16, off, s[0:3], s32 offset:332 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v30, off, s[0:3], s32 offset:320 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[4:5]
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-O0-NEXT:    s_setpc_b64 s[30:31]
@@ -1441,160 +1427,179 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-G-O0:       ; %bb.0: ; %_udiv-special-cases
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-G-O0-NEXT:    s_xor_saveexec_b64 s[4:5], -1
-; GFX9-G-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:328 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:332 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:336 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:340 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:344 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v34, off, s[0:3], s32 offset:328 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    s_mov_b64 exec, s[4:5]
-; GFX9-G-O0-NEXT:    ; implicit-def: $vgpr8 : SGPR spill to VGPR lane
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v0
-; GFX9-G-O0-NEXT:    s_or_saveexec_b64 s[20:21], -1
-; GFX9-G-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    s_mov_b64 exec, s[20:21]
-; GFX9-G-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9_vgpr10_vgpr11 killed $exec
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v1
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v2
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, v3
-; GFX9-G-O0-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v1
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v2
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v3
+; GFX9-G-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, v10
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v9
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, v8
+; GFX9-G-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-G-O0-NEXT:    buffer_store_dword v9, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v10, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v11, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v4
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, v5
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v6
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, v7
-; GFX9-G-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14_vgpr15_vgpr16 killed $exec
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, v3
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v15, v2
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v16, v1
+; GFX9-G-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, v4
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, v5
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, v6
+; GFX9-G-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13_vgpr14_vgpr15 killed $exec
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v1
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, v0
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v15, v7
 ; GFX9-G-O0-NEXT:    s_mov_b64 s[4:5], 0
 ; GFX9-G-O0-NEXT:    s_mov_b64 s[12:13], 0x7f
-; GFX9-G-O0-NEXT:    ; kill: def $vgpr1_vgpr2 killed $vgpr4_vgpr5 killed $exec
+; GFX9-G-O0-NEXT:    ; kill: def $vgpr0_vgpr1 killed $vgpr3_vgpr4 killed $exec
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, v5
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, v6
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v7
-; GFX9-G-O0-NEXT:    ; kill: def $vgpr3 killed $vgpr1 killed $exec
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v2
+; GFX9-G-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr0 killed $exec
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, v1
 ; GFX9-G-O0-NEXT:    s_mov_b32 s6, 31
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, s6
-; GFX9-G-O0-NEXT:    v_ashrrev_i32_e64 v12, v3, v8
-; GFX9-G-O0-NEXT:    ; kill: def $vgpr3 killed $vgpr1 killed $exec
-; GFX9-G-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr2 killed $vgpr1_vgpr2 killed $exec
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, s6
+; GFX9-G-O0-NEXT:    v_ashrrev_i32_e64 v11, v2, v7
+; GFX9-G-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr0 killed $exec
+; GFX9-G-O0-NEXT:    ; kill: def $vgpr1 killed $vgpr1 killed $vgpr0_vgpr1 killed $exec
 ; GFX9-G-O0-NEXT:    s_mov_b32 s6, 31
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, s6
-; GFX9-G-O0-NEXT:    v_ashrrev_i32_e64 v10, v1, v2
-; GFX9-G-O0-NEXT:    ; kill: def $vgpr1_vgpr2 killed $vgpr13_vgpr14 killed $exec
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, s6
+; GFX9-G-O0-NEXT:    v_ashrrev_i32_e64 v9, v0, v1
+; GFX9-G-O0-NEXT:    ; kill: def $vgpr0_vgpr1 killed $vgpr12_vgpr13 killed $exec
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, v14
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, v15
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v16
-; GFX9-G-O0-NEXT:    ; kill: def $vgpr3 killed $vgpr1 killed $exec
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v2
+; GFX9-G-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr0 killed $exec
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, v1
 ; GFX9-G-O0-NEXT:    s_mov_b32 s6, 31
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, s6
-; GFX9-G-O0-NEXT:    v_ashrrev_i32_e64 v11, v3, v8
-; GFX9-G-O0-NEXT:    ; kill: def $vgpr3 killed $vgpr1 killed $exec
-; GFX9-G-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr2 killed $vgpr1_vgpr2 killed $exec
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, s6
+; GFX9-G-O0-NEXT:    v_ashrrev_i32_e64 v10, v2, v7
+; GFX9-G-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr0 killed $exec
+; GFX9-G-O0-NEXT:    ; kill: def $vgpr1 killed $vgpr1 killed $vgpr0_vgpr1 killed $exec
 ; GFX9-G-O0-NEXT:    s_mov_b32 s6, 31
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, s6
-; GFX9-G-O0-NEXT:    v_ashrrev_i32_e64 v9, v1, v2
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, s6
+; GFX9-G-O0-NEXT:    v_ashrrev_i32_e64 v8, v0, v1
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, v3
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v4
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, v5
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, v5
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, v6
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, v7
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, v1
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, v2
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v3
-; GFX9-G-O0-NEXT:    v_xor_b32_e64 v1, v12, v1
-; GFX9-G-O0-NEXT:    v_xor_b32_e64 v4, v12, v2
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, v5
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v6
-; GFX9-G-O0-NEXT:    v_xor_b32_e64 v3, v10, v3
-; GFX9-G-O0-NEXT:    v_xor_b32_e64 v2, v10, v2
-; GFX9-G-O0-NEXT:    v_sub_co_u32_e64 v1, s[6:7], v1, v12
-; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v7, s[6:7], v4, v12, s[6:7]
-; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v6, s[6:7], v3, v10, s[6:7]
-; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v5, s[6:7], v2, v10, s[6:7]
-; GFX9-G-O0-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2_vgpr3_vgpr4 killed $exec
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v7
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, v6
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, v5
-; GFX9-G-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    v_xor_b32_e64 v0, v11, v0
+; GFX9-G-O0-NEXT:    v_xor_b32_e64 v3, v11, v1
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v4
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, v5
+; GFX9-G-O0-NEXT:    v_xor_b32_e64 v2, v9, v2
+; GFX9-G-O0-NEXT:    v_xor_b32_e64 v1, v9, v1
+; GFX9-G-O0-NEXT:    v_sub_co_u32_e64 v0, s[6:7], v0, v11
+; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v6, s[6:7], v3, v11, s[6:7]
+; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v5, s[6:7], v2, v9, s[6:7]
+; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v4, s[6:7], v1, v9, s[6:7]
+; GFX9-G-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, v6
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v5
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, v4
+; GFX9-G-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-G-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, v12
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, v13
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, v14
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, v14
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v15
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, v16
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, v5
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, v6
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, v7
-; GFX9-G-O0-NEXT:    v_xor_b32_e64 v5, v11, v5
-; GFX9-G-O0-NEXT:    v_xor_b32_e64 v8, v11, v6
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, v13
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, v14
-; GFX9-G-O0-NEXT:    v_xor_b32_e64 v7, v9, v7
-; GFX9-G-O0-NEXT:    v_xor_b32_e64 v6, v9, v6
-; GFX9-G-O0-NEXT:    v_sub_co_u32_e64 v5, s[6:7], v5, v11
-; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v15, s[6:7], v8, v11, s[6:7]
-; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v14, s[6:7], v7, v9, s[6:7]
-; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v13, s[6:7], v6, v9, s[6:7]
-; GFX9-G-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6_vgpr7_vgpr8 killed $exec
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, v15
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, v14
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v13
-; GFX9-G-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    v_xor_b32_e64 v4, v10, v4
+; GFX9-G-O0-NEXT:    v_xor_b32_e64 v7, v10, v5
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, v12
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, v13
+; GFX9-G-O0-NEXT:    v_xor_b32_e64 v6, v8, v6
+; GFX9-G-O0-NEXT:    v_xor_b32_e64 v5, v8, v5
+; GFX9-G-O0-NEXT:    v_sub_co_u32_e64 v4, s[6:7], v4, v10
+; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v14, s[6:7], v7, v10, s[6:7]
+; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v13, s[6:7], v6, v8, s[6:7]
+; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v12, s[6:7], v5, v8, s[6:7]
+; GFX9-G-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5_vgpr6_vgpr7 killed $exec
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, v14
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, v13
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, v12
+; GFX9-G-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-G-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    v_xor_b32_e64 v13, v11, v12
-; GFX9-G-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    v_xor_b32_e64 v11, v11, v12
-; GFX9-G-O0-NEXT:    buffer_store_dword v11, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    v_xor_b32_e64 v11, v9, v10
-; GFX9-G-O0-NEXT:    buffer_store_dword v11, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    v_xor_b32_e64 v9, v9, v10
-; GFX9-G-O0-NEXT:    buffer_store_dword v9, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, v6
+; GFX9-G-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    v_xor_b32_e64 v12, v10, v11
+; GFX9-G-O0-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    v_xor_b32_e64 v10, v10, v11
+; GFX9-G-O0-NEXT:    buffer_store_dword v10, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    v_xor_b32_e64 v10, v8, v9
+; GFX9-G-O0-NEXT:    buffer_store_dword v10, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    v_xor_b32_e64 v8, v8, v9
+; GFX9-G-O0-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v5
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, v8
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v4
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v7
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, v6
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v9
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v10
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, v12
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v13
+; GFX9-G-O0-NEXT:    v_or_b32_e64 v8, v8, v11
+; GFX9-G-O0-NEXT:    v_or_b32_e64 v10, v9, v10
+; GFX9-G-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v10
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v11
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, v13
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, v14
-; GFX9-G-O0-NEXT:    v_or_b32_e64 v9, v9, v12
-; GFX9-G-O0-NEXT:    v_or_b32_e64 v11, v10, v11
-; GFX9-G-O0-NEXT:    ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v11
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, s5
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, s4
-; GFX9-G-O0-NEXT:    v_cmp_eq_u64_e64 s[6:7], v[9:10], v[11:12]
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, v2
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, s5
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, s4
+; GFX9-G-O0-NEXT:    v_cmp_eq_u64_e64 s[6:7], v[8:9], v[10:11]
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v1
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, v4
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v0
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v3
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, v2
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v9
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v10
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, v12
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v13
+; GFX9-G-O0-NEXT:    v_or_b32_e64 v8, v8, v11
+; GFX9-G-O0-NEXT:    v_or_b32_e64 v10, v9, v10
+; GFX9-G-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v10
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v11
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, v13
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, v14
-; GFX9-G-O0-NEXT:    v_or_b32_e64 v9, v9, v12
-; GFX9-G-O0-NEXT:    v_or_b32_e64 v11, v10, v11
-; GFX9-G-O0-NEXT:    ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v11
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, s5
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, s4
-; GFX9-G-O0-NEXT:    v_cmp_eq_u64_e64 s[8:9], v[9:10], v[11:12]
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, s5
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, s4
+; GFX9-G-O0-NEXT:    v_cmp_eq_u64_e64 s[8:9], v[8:9], v[10:11]
 ; GFX9-G-O0-NEXT:    s_or_b64 s[6:7], s[6:7], s[8:9]
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, v6
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v5
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v8
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v4
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v7
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, v6
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, s4
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, s5
+; GFX9-G-O0-NEXT:    v_cmp_eq_u64_e64 s[8:9], v[7:8], v[4:5]
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, v9
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, v10
+; GFX9-G-O0-NEXT:    v_ffbh_u32_e64 v4, v4
+; GFX9-G-O0-NEXT:    v_ffbh_u32_e64 v5, v5
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, 32
+; GFX9-G-O0-NEXT:    v_add_u32_e64 v5, v5, v6
+; GFX9-G-O0-NEXT:    v_min_u32_e64 v4, v4, v5
+; GFX9-G-O0-NEXT:    s_mov_b32 s10, 64
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, s10
+; GFX9-G-O0-NEXT:    v_add_u32_e64 v5, v4, v5
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, v7
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, v8
+; GFX9-G-O0-NEXT:    v_ffbh_u32_e64 v4, v4
+; GFX9-G-O0-NEXT:    v_ffbh_u32_e64 v6, v6
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, 32
+; GFX9-G-O0-NEXT:    v_add_u32_e64 v6, v6, v7
+; GFX9-G-O0-NEXT:    v_min_u32_e64 v4, v4, v6
+; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v5, s[8:9]
+; GFX9-G-O0-NEXT:    s_mov_b32 s16, 0
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, v1
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v0
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v3
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v2
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, s5
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, s4
 ; GFX9-G-O0-NEXT:    v_cmp_eq_u64_e64 s[8:9], v[8:9], v[5:6]
@@ -1605,7 +1610,6 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, 32
 ; GFX9-G-O0-NEXT:    v_add_u32_e64 v6, v6, v7
 ; GFX9-G-O0-NEXT:    v_min_u32_e64 v5, v5, v6
-; GFX9-G-O0-NEXT:    s_mov_b32 s10, 64
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, s10
 ; GFX9-G-O0-NEXT:    v_add_u32_e64 v6, v5, v6
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, v8
@@ -1615,130 +1619,106 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, 32
 ; GFX9-G-O0-NEXT:    v_add_u32_e64 v7, v7, v8
 ; GFX9-G-O0-NEXT:    v_min_u32_e64 v5, v5, v7
-; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v5, v5, v6, s[8:9]
-; GFX9-G-O0-NEXT:    s_mov_b32 s16, 0
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, v2
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, v1
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v4
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v3
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, s5
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, s4
-; GFX9-G-O0-NEXT:    v_cmp_eq_u64_e64 s[8:9], v[9:10], v[6:7]
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, v11
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, v12
-; GFX9-G-O0-NEXT:    v_ffbh_u32_e64 v6, v6
-; GFX9-G-O0-NEXT:    v_ffbh_u32_e64 v7, v7
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, 32
-; GFX9-G-O0-NEXT:    v_add_u32_e64 v7, v7, v8
-; GFX9-G-O0-NEXT:    v_min_u32_e64 v6, v6, v7
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, s10
-; GFX9-G-O0-NEXT:    v_add_u32_e64 v7, v6, v7
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v9
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, v10
-; GFX9-G-O0-NEXT:    v_ffbh_u32_e64 v6, v6
-; GFX9-G-O0-NEXT:    v_ffbh_u32_e64 v8, v8
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, 32
-; GFX9-G-O0-NEXT:    v_add_u32_e64 v8, v8, v9
-; GFX9-G-O0-NEXT:    v_min_u32_e64 v6, v6, v8
-; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v6, v6, v7, s[8:9]
+; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v5, v5, v6, s[8:9]
 ; GFX9-G-O0-NEXT:    s_mov_b32 s15, 0
 ; GFX9-G-O0-NEXT:    s_mov_b32 s11, 0
 ; GFX9-G-O0-NEXT:    s_mov_b32 s14, 0
 ; GFX9-G-O0-NEXT:    s_mov_b32 s10, 0
-; GFX9-G-O0-NEXT:    v_sub_co_u32_e64 v6, s[8:9], v5, v6
-; GFX9-G-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, s16
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, s16
-; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v7, s[8:9], v5, v7, s[8:9]
-; GFX9-G-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, s15
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, s14
-; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v9, s[8:9], v5, v8, s[8:9]
-; GFX9-G-O0-NEXT:    buffer_store_dword v9, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, s11
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, s10
-; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v8, s[8:9], v5, v8, s[8:9]
-; GFX9-G-O0-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    v_sub_co_u32_e64 v5, s[8:9], v4, v5
+; GFX9-G-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, s16
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, s16
+; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v6, s[8:9], v4, v6, s[8:9]
+; GFX9-G-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, s15
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, s14
+; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v8, s[8:9], v4, v7, s[8:9]
+; GFX9-G-O0-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, s11
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, s10
+; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v7, s[8:9], v4, v7, s[8:9]
+; GFX9-G-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v5
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v6
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, v7
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, v9
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v8
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v15, s5
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, s4
-; GFX9-G-O0-NEXT:    v_cmp_gt_u64_e64 s[10:11], v[12:13], v[14:15]
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v15, s5
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, s4
-; GFX9-G-O0-NEXT:    v_cmp_eq_u64_e64 s[8:9], v[12:13], v[14:15]
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, s12
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, s13
-; GFX9-G-O0-NEXT:    v_cmp_gt_u64_e64 s[12:13], v[10:11], v[12:13]
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, v8
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, v7
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, s5
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, s4
+; GFX9-G-O0-NEXT:    v_cmp_gt_u64_e64 s[10:11], v[11:12], v[13:14]
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, s5
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, s4
+; GFX9-G-O0-NEXT:    v_cmp_eq_u64_e64 s[8:9], v[11:12], v[13:14]
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, s12
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, s13
+; GFX9-G-O0-NEXT:    v_cmp_gt_u64_e64 s[12:13], v[9:10], v[11:12]
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, 1
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, 0
+; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v9, v4, v9, s[12:13]
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, 1
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, 0
-; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v10, v5, v10, s[12:13]
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, 1
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, 0
-; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v5, v5, v11, s[10:11]
-; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v10, v5, v10, s[8:9]
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, 1
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, 0
-; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v5, v5, v11, s[6:7]
-; GFX9-G-O0-NEXT:    v_or_b32_e64 v5, v5, v10
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, 0
+; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v10, s[10:11]
+; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v9, v4, v9, s[8:9]
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, 1
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, 0
+; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v10, s[6:7]
+; GFX9-G-O0-NEXT:    v_or_b32_e64 v4, v4, v9
 ; GFX9-G-O0-NEXT:    s_mov_b32 s7, 0x7f
 ; GFX9-G-O0-NEXT:    s_mov_b32 s6, 0
-; GFX9-G-O0-NEXT:    v_xor_b32_e64 v6, v6, s7
-; GFX9-G-O0-NEXT:    v_xor_b32_e64 v7, v7, s6
-; GFX9-G-O0-NEXT:    v_or_b32_e64 v6, v6, v9
-; GFX9-G-O0-NEXT:    v_or_b32_e64 v8, v7, v8
-; GFX9-G-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, v8
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, s5
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, s4
-; GFX9-G-O0-NEXT:    v_cmp_eq_u64_e64 s[4:5], v[6:7], v[8:9]
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, v2
+; GFX9-G-O0-NEXT:    v_xor_b32_e64 v5, v5, s7
+; GFX9-G-O0-NEXT:    v_xor_b32_e64 v6, v6, s6
+; GFX9-G-O0-NEXT:    v_or_b32_e64 v5, v5, v8
+; GFX9-G-O0-NEXT:    v_or_b32_e64 v7, v6, v7
+; GFX9-G-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, v7
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, s5
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, s4
+; GFX9-G-O0-NEXT:    v_cmp_eq_u64_e64 s[4:5], v[5:6], v[7:8]
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, v1
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v4
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, v0
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v3
-; GFX9-G-O0-NEXT:    v_and_b32_e32 v1, 1, v5
-; GFX9-G-O0-NEXT:    v_cmp_ne_u32_e64 s[6:7], 0, v1
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, 0
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, v2
+; GFX9-G-O0-NEXT:    v_and_b32_e32 v0, 1, v4
+; GFX9-G-O0-NEXT:    v_cmp_ne_u32_e64 s[6:7], 0, v0
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, 0
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, 0
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, v5
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, v6
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v7
-; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v1, v1, v4, s[6:7]
-; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v3, v2, v3, s[6:7]
-; GFX9-G-O0-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v3
-; GFX9-G-O0-NEXT:    v_and_b32_e32 v3, 1, v5
-; GFX9-G-O0-NEXT:    v_cmp_ne_u32_e64 s[6:7], 0, v3
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, 0
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, 0
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, v8
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, v9
-; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v6, v6, v7, s[6:7]
-; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v3, v3, v4, s[6:7]
-; GFX9-G-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, v3
-; GFX9-G-O0-NEXT:    ; kill: def $vgpr1_vgpr2 killed $vgpr1_vgpr2 def $vgpr1_vgpr2_vgpr3_vgpr4 killed $exec
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, v6
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, v7
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, 1
+; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v0, v0, v3, s[6:7]
+; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v2, v1, v2, s[6:7]
+; GFX9-G-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, v2
+; GFX9-G-O0-NEXT:    v_and_b32_e32 v2, 1, v4
+; GFX9-G-O0-NEXT:    v_cmp_ne_u32_e64 s[6:7], 0, v2
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, 0
-; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v6, v6, v7, s[4:5]
-; GFX9-G-O0-NEXT:    v_or_b32_e64 v5, v5, v6
-; GFX9-G-O0-NEXT:    v_and_b32_e32 v5, 1, v5
-; GFX9-G-O0-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v5
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, 0
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, v7
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v8
+; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v5, v5, v6, s[6:7]
+; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v2, v2, v3, s[6:7]
+; GFX9-G-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, v2
+; GFX9-G-O0-NEXT:    ; kill: def $vgpr0_vgpr1 killed $vgpr0_vgpr1 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v5
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, v6
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, 1
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, 0
+; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v5, v5, v6, s[4:5]
+; GFX9-G-O0-NEXT:    v_or_b32_e64 v4, v4, v5
+; GFX9-G-O0-NEXT:    v_and_b32_e32 v4, 1, v4
+; GFX9-G-O0-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v4
 ; GFX9-G-O0-NEXT:    s_mov_b64 s[6:7], -1
 ; GFX9-G-O0-NEXT:    s_xor_b64 s[6:7], s[4:5], s[6:7]
-; GFX9-G-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-G-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    s_mov_b64 s[4:5], exec
-; GFX9-G-O0-NEXT:    v_writelane_b32 v0, s4, 0
-; GFX9-G-O0-NEXT:    v_writelane_b32 v0, s5, 1
+; GFX9-G-O0-NEXT:    ; implicit-def: $vgpr34 : SGPR spill to VGPR lane
+; GFX9-G-O0-NEXT:    v_writelane_b32 v34, s4, 0
+; GFX9-G-O0-NEXT:    v_writelane_b32 v34, s5, 1
 ; GFX9-G-O0-NEXT:    s_or_saveexec_b64 s[20:21], -1
-; GFX9-G-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v34, off, s[0:3], s32 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    s_mov_b64 exec, s[20:21]
 ; GFX9-G-O0-NEXT:    s_and_b64 s[4:5], s[4:5], s[6:7]
 ; GFX9-G-O0-NEXT:    s_mov_b64 exec, s[4:5]
@@ -1746,11 +1726,11 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-G-O0-NEXT:    s_branch .LBB0_8
 ; GFX9-G-O0-NEXT:  .LBB0_1: ; %Flow
 ; GFX9-G-O0-NEXT:    s_or_saveexec_b64 s[20:21], -1
-; GFX9-G-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload
 ; GFX9-G-O0-NEXT:    s_mov_b64 exec, s[20:21]
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-G-O0-NEXT:    v_readlane_b32 s4, v0, 2
-; GFX9-G-O0-NEXT:    v_readlane_b32 s5, v0, 3
+; GFX9-G-O0-NEXT:    v_readlane_b32 s4, v34, 2
+; GFX9-G-O0-NEXT:    v_readlane_b32 s5, v34, 3
 ; GFX9-G-O0-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX9-G-O0-NEXT:  ; %bb.2: ; %Flow
 ; GFX9-G-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload
@@ -1775,11 +1755,11 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-G-O0-NEXT:    s_branch .LBB0_5
 ; GFX9-G-O0-NEXT:  .LBB0_3: ; %Flow2
 ; GFX9-G-O0-NEXT:    s_or_saveexec_b64 s[20:21], -1
-; GFX9-G-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload
 ; GFX9-G-O0-NEXT:    s_mov_b64 exec, s[20:21]
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-G-O0-NEXT:    v_readlane_b32 s4, v4, 0
-; GFX9-G-O0-NEXT:    v_readlane_b32 s5, v4, 1
+; GFX9-G-O0-NEXT:    v_readlane_b32 s4, v34, 0
+; GFX9-G-O0-NEXT:    v_readlane_b32 s5, v34, 1
 ; GFX9-G-O0-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX9-G-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
 ; GFX9-G-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
@@ -1849,11 +1829,11 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-G-O0-NEXT:    s_branch .LBB0_3
 ; GFX9-G-O0-NEXT:  .LBB0_5: ; %Flow1
 ; GFX9-G-O0-NEXT:    s_or_saveexec_b64 s[20:21], -1
-; GFX9-G-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload
 ; GFX9-G-O0-NEXT:    s_mov_b64 exec, s[20:21]
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-G-O0-NEXT:    v_readlane_b32 s4, v8, 4
-; GFX9-G-O0-NEXT:    v_readlane_b32 s5, v8, 5
+; GFX9-G-O0-NEXT:    v_readlane_b32 s4, v34, 4
+; GFX9-G-O0-NEXT:    v_readlane_b32 s5, v34, 5
 ; GFX9-G-O0-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX9-G-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload
 ; GFX9-G-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:120 ; 4-byte Folded Reload
@@ -1878,11 +1858,11 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-G-O0-NEXT:  .LBB0_6: ; %udiv-do-while
 ; GFX9-G-O0-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX9-G-O0-NEXT:    s_or_saveexec_b64 s[20:21], -1
-; GFX9-G-O0-NEXT:    buffer_load_dword v16, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload
 ; GFX9-G-O0-NEXT:    s_mov_b64 exec, s[20:21]
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-G-O0-NEXT:    v_readlane_b32 s6, v16, 6
-; GFX9-G-O0-NEXT:    v_readlane_b32 s7, v16, 7
+; GFX9-G-O0-NEXT:    v_readlane_b32 s6, v34, 6
+; GFX9-G-O0-NEXT:    v_readlane_b32 s7, v34, 7
 ; GFX9-G-O0-NEXT:    buffer_load_dword v12, off, s[0:3], s32 offset:212 ; 4-byte Folded Reload
 ; GFX9-G-O0-NEXT:    buffer_load_dword v13, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload
 ; GFX9-G-O0-NEXT:    buffer_load_dword v14, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload
@@ -1891,18 +1871,18 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-G-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload
 ; GFX9-G-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload
 ; GFX9-G-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:240 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v17, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v18, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v19, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v20, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v33, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v34, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v25, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v26, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v27, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v28, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v16, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v17, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v18, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v19, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v30, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v33, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v24, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v25, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v26, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v27, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
 ; GFX9-G-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload
 ; GFX9-G-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload
 ; GFX9-G-O0-NEXT:    buffer_load_dword v10, off, s[0:3], s32 offset:284 ; 4-byte Folded Reload
@@ -1915,7 +1895,7 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, v5
 ; GFX9-G-O0-NEXT:    s_mov_b32 s8, 1
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, s8
-; GFX9-G-O0-NEXT:    v_lshlrev_b64 v[21:22], v2, v[0:1]
+; GFX9-G-O0-NEXT:    v_lshlrev_b64 v[20:21], v2, v[0:1]
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, s8
 ; GFX9-G-O0-NEXT:    v_lshlrev_b64 v[4:5], v2, v[3:4]
 ; GFX9-G-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr0 killed $exec
@@ -1939,8 +1919,8 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-G-O0-NEXT:    v_lshrrev_b32_e64 v3, v0, v1
 ; GFX9-G-O0-NEXT:    s_mov_b32 s9, 0
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, s9
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v21
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, v22
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v20
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, v21
 ; GFX9-G-O0-NEXT:    v_or_b32_e64 v4, v2, v3
 ; GFX9-G-O0-NEXT:    v_or_b32_e64 v9, v0, v1
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v12
@@ -1948,7 +1928,7 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, v14
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v15
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, s8
-; GFX9-G-O0-NEXT:    v_lshlrev_b64 v[23:24], v0, v[2:3]
+; GFX9-G-O0-NEXT:    v_lshlrev_b64 v[22:23], v0, v[2:3]
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, s8
 ; GFX9-G-O0-NEXT:    v_lshlrev_b64 v[0:1], v0, v[12:13]
 ; GFX9-G-O0-NEXT:    ; kill: def $vgpr12 killed $vgpr2 killed $exec
@@ -1961,20 +1941,20 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, v0
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v1
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(8)
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v28, v30
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v29, v31
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v30, v32
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v20, v32
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v21, v33
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v22, v34
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, v29
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, v30
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v15, v23
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v24
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, v28
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, v29
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v15, v22
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v23
 ; GFX9-G-O0-NEXT:    v_or_b32_e64 v0, v0, v15
 ; GFX9-G-O0-NEXT:    v_or_b32_e64 v13, v1, v13
 ; GFX9-G-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, v13
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v15, v21
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v22
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v15, v20
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v21
 ; GFX9-G-O0-NEXT:    v_or3_b32 v12, v12, v14, v15
 ; GFX9-G-O0-NEXT:    v_or3_b32 v2, v2, v3, v13
 ; GFX9-G-O0-NEXT:    ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
@@ -2004,18 +1984,18 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-G-O0-NEXT:    ; kill: def $vgpr12_vgpr13 killed $vgpr12_vgpr13 def $vgpr12_vgpr13_vgpr14_vgpr15 killed $exec
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v15, v11
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, v10
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v22, v24
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v23, v25
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v24, v26
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v20, v26
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v21, v27
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v22, v28
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, v23
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v24
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, v22
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v23
 ; GFX9-G-O0-NEXT:    v_and_b32_e64 v11, v8, v11
 ; GFX9-G-O0-NEXT:    v_and_b32_e64 v10, v8, v10
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v21
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v21, v22
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v20
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v20, v21
 ; GFX9-G-O0-NEXT:    v_and_b32_e64 v8, v6, v8
-; GFX9-G-O0-NEXT:    v_and_b32_e64 v6, v6, v21
+; GFX9-G-O0-NEXT:    v_and_b32_e64 v6, v6, v20
 ; GFX9-G-O0-NEXT:    v_sub_co_u32_e64 v4, s[8:9], v4, v11
 ; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v10, s[8:9], v9, v10, s[8:9]
 ; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v9, s[8:9], v7, v8, s[8:9]
@@ -2024,60 +2004,60 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, v10
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, v9
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, v8
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, v17
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v18
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v19
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v20
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, v16
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v17
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v18
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v19
 ; GFX9-G-O0-NEXT:    s_mov_b32 s8, -1
 ; GFX9-G-O0-NEXT:    s_mov_b32 s12, -1
 ; GFX9-G-O0-NEXT:    s_mov_b32 s11, -1
 ; GFX9-G-O0-NEXT:    s_mov_b32 s10, -1
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v17, s8
-; GFX9-G-O0-NEXT:    v_add_co_u32_e64 v17, s[8:9], v11, v17
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v16, s8
+; GFX9-G-O0-NEXT:    v_add_co_u32_e64 v16, s[8:9], v11, v16
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, s12
-; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v18, s[8:9], v10, v11, s[8:9]
+; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v17, s[8:9], v10, v11, s[8:9]
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, s11
-; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v20, s[8:9], v9, v10, s[8:9]
+; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v19, s[8:9], v9, v10, s[8:9]
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, s10
-; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v19, s[8:9], v8, v9, s[8:9]
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v17
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v18
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v20
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, v19
-; GFX9-G-O0-NEXT:    v_or_b32_e64 v17, v17, v20
-; GFX9-G-O0-NEXT:    v_or_b32_e64 v19, v18, v19
-; GFX9-G-O0-NEXT:    ; kill: def $vgpr17 killed $vgpr17 def $vgpr17_vgpr18 killed $exec
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v18, v19
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v20, s5
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v19, s4
-; GFX9-G-O0-NEXT:    v_cmp_eq_u64_e64 s[4:5], v[17:18], v[19:20]
+; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v18, s[8:9], v8, v9, s[8:9]
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v16
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v17
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v19
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, v18
+; GFX9-G-O0-NEXT:    v_or_b32_e64 v16, v16, v19
+; GFX9-G-O0-NEXT:    v_or_b32_e64 v18, v17, v18
+; GFX9-G-O0-NEXT:    ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v17, v18
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v19, s5
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v18, s4
+; GFX9-G-O0-NEXT:    v_cmp_eq_u64_e64 s[4:5], v[16:17], v[18:19]
 ; GFX9-G-O0-NEXT:    s_or_b64 s[4:5], s[4:5], s[6:7]
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v20, v3
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v19, v2
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v18, v1
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v17, v0
-; GFX9-G-O0-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v19, v3
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v18, v2
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v17, v1
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v16, v0
+; GFX9-G-O0-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-G-O0-NEXT:    buffer_store_dword v18, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v19, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v20, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v20, v15
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v19, v14
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v18, v13
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v17, v12
-; GFX9-G-O0-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v18, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v19, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v19, v15
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v18, v14
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v17, v13
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v16, v12
+; GFX9-G-O0-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-G-O0-NEXT:    buffer_store_dword v18, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v19, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v20, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v18, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v19, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    s_mov_b64 s[6:7], s[4:5]
-; GFX9-G-O0-NEXT:    v_writelane_b32 v16, s6, 2
-; GFX9-G-O0-NEXT:    v_writelane_b32 v16, s7, 3
+; GFX9-G-O0-NEXT:    v_writelane_b32 v34, s6, 2
+; GFX9-G-O0-NEXT:    v_writelane_b32 v34, s7, 3
 ; GFX9-G-O0-NEXT:    s_mov_b64 s[6:7], s[4:5]
-; GFX9-G-O0-NEXT:    v_writelane_b32 v16, s6, 6
-; GFX9-G-O0-NEXT:    v_writelane_b32 v16, s7, 7
+; GFX9-G-O0-NEXT:    v_writelane_b32 v34, s6, 6
+; GFX9-G-O0-NEXT:    v_writelane_b32 v34, s7, 7
 ; GFX9-G-O0-NEXT:    s_or_saveexec_b64 s[20:21], -1
-; GFX9-G-O0-NEXT:    buffer_store_dword v16, off, s[0:3], s32 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v34, off, s[0:3], s32 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    s_mov_b64 exec, s[20:21]
 ; GFX9-G-O0-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
@@ -2103,6 +2083,9 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-G-O0-NEXT:    s_cbranch_execnz .LBB0_6
 ; GFX9-G-O0-NEXT:    s_branch .LBB0_1
 ; GFX9-G-O0-NEXT:  .LBB0_7: ; %udiv-preheader
+; GFX9-G-O0-NEXT:    s_or_saveexec_b64 s[20:21], -1
+; GFX9-G-O0-NEXT:    buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    s_mov_b64 exec, s[20:21]
 ; GFX9-G-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:292 ; 4-byte Folded Reload
 ; GFX9-G-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:296 ; 4-byte Folded Reload
 ; GFX9-G-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:300 ; 4-byte Folded Reload
@@ -2111,91 +2094,88 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-G-O0-NEXT:    buffer_load_dword v9, off, s[0:3], s32 offset:312 ; 4-byte Folded Reload
 ; GFX9-G-O0-NEXT:    buffer_load_dword v10, off, s[0:3], s32 offset:316 ; 4-byte Folded Reload
 ; GFX9-G-O0-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:320 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    s_or_saveexec_b64 s[20:21], -1
-; GFX9-G-O0-NEXT:    buffer_load_dword v12, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    s_mov_b64 exec, s[20:21]
-; GFX9-G-O0-NEXT:    buffer_load_dword v17, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v18, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v19, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v20, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v13, off, s[0:3], s32 offset:324 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v16, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v17, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v18, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v19, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v12, off, s[0:3], s32 offset:324 ; 4-byte Folded Reload
 ; GFX9-G-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
 ; GFX9-G-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
 ; GFX9-G-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
 ; GFX9-G-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
 ; GFX9-G-O0-NEXT:    s_mov_b32 s4, 64
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v16, v5
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v15, v4
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v22, v7
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v21, v6
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v15, v5
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, v4
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v21, v7
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v20, v6
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, s4
-; GFX9-G-O0-NEXT:    v_sub_u32_e64 v4, v13, v4
+; GFX9-G-O0-NEXT:    v_sub_u32_e64 v4, v12, v4
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, s4
-; GFX9-G-O0-NEXT:    v_sub_u32_e64 v5, v5, v13
+; GFX9-G-O0-NEXT:    v_sub_u32_e64 v5, v5, v12
 ; GFX9-G-O0-NEXT:    s_mov_b32 s6, 0
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, s4
-; GFX9-G-O0-NEXT:    v_cmp_lt_u32_e64 s[4:5], v13, v6
+; GFX9-G-O0-NEXT:    v_cmp_lt_u32_e64 s[4:5], v12, v6
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, s6
-; GFX9-G-O0-NEXT:    v_cmp_eq_u32_e64 s[6:7], v13, v6
-; GFX9-G-O0-NEXT:    v_lshrrev_b64 v[6:7], v13, v[21:22]
-; GFX9-G-O0-NEXT:    v_lshrrev_b64 v[26:27], v13, v[15:16]
-; GFX9-G-O0-NEXT:    v_lshlrev_b64 v[24:25], v5, v[21:22]
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, v26
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, v27
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v23, v24
+; GFX9-G-O0-NEXT:    v_cmp_eq_u32_e64 s[6:7], v12, v6
+; GFX9-G-O0-NEXT:    v_lshrrev_b64 v[6:7], v12, v[20:21]
+; GFX9-G-O0-NEXT:    v_lshrrev_b64 v[25:26], v12, v[14:15]
+; GFX9-G-O0-NEXT:    v_lshlrev_b64 v[23:24], v5, v[20:21]
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v25
-; GFX9-G-O0-NEXT:    v_or_b32_e64 v14, v14, v23
-; GFX9-G-O0-NEXT:    v_or_b32_e64 v13, v5, v13
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, v26
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v22, v23
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, v24
+; GFX9-G-O0-NEXT:    v_or_b32_e64 v13, v13, v22
+; GFX9-G-O0-NEXT:    v_or_b32_e64 v12, v5, v12
 ; GFX9-G-O0-NEXT:    s_mov_b64 s[8:9], 0
-; GFX9-G-O0-NEXT:    v_lshrrev_b64 v[21:22], v4, v[21:22]
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, v21
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, v22
-; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v14, s[4:5]
-; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v5, v5, v13, s[4:5]
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, v15
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v16
-; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v14, s[6:7]
-; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v13, v5, v13, s[6:7]
+; GFX9-G-O0-NEXT:    v_lshrrev_b64 v[20:21], v4, v[20:21]
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, v20
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, v21
+; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v13, s[4:5]
+; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v5, v5, v12, s[4:5]
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v14
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, v15
+; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v13, s[6:7]
+; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v12, v5, v12, s[6:7]
 ; GFX9-G-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, v13
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, v6
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, v12
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v6
 ; GFX9-G-O0-NEXT:    ; kill: def $vgpr7 killed $vgpr7 killed $vgpr6_vgpr7 killed $exec
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, 0
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, 0
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, 0
-; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v13, v13, v14, s[4:5]
+; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v12, v12, v13, s[4:5]
 ; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v6, v6, v7, s[4:5]
-; GFX9-G-O0-NEXT:    ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14 killed $exec
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, v6
+; GFX9-G-O0-NEXT:    ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v6
 ; GFX9-G-O0-NEXT:    ; kill: def $vgpr4_vgpr5 killed $vgpr4_vgpr5 def $vgpr4_vgpr5_vgpr6_vgpr7 killed $exec
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, v13
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, v14
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v16, v17
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v15, v18
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, v19
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v20
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, v12
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, v13
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v15, v16
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, v17
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v18
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, v19
 ; GFX9-G-O0-NEXT:    s_mov_b32 s4, -1
 ; GFX9-G-O0-NEXT:    s_mov_b32 s10, -1
 ; GFX9-G-O0-NEXT:    s_mov_b32 s7, -1
 ; GFX9-G-O0-NEXT:    s_mov_b32 s6, -1
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v17, s4
-; GFX9-G-O0-NEXT:    v_add_co_u32_e64 v16, s[4:5], v16, v17
-; GFX9-G-O0-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v16, s10
-; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v15, s[4:5], v15, v16, s[4:5]
-; GFX9-G-O0-NEXT:    buffer_store_dword v15, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v15, s7
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v16, s4
+; GFX9-G-O0-NEXT:    v_add_co_u32_e64 v15, s[4:5], v15, v16
+; GFX9-G-O0-NEXT:    buffer_store_dword v15, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v15, s10
 ; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v14, s[4:5], v14, v15, s[4:5]
-; GFX9-G-O0-NEXT:    buffer_store_dword v14, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, s6
+; GFX9-G-O0-NEXT:    buffer_store_dword v14, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, s7
 ; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v13, s[4:5], v13, v14, s[4:5]
-; GFX9-G-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, s6
+; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v12, s[4:5], v12, v13, s[4:5]
+; GFX9-G-O0-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    s_mov_b64 s[4:5], s[8:9]
 ; GFX9-G-O0-NEXT:    s_mov_b64 s[6:7], s[8:9]
-; GFX9-G-O0-NEXT:    v_writelane_b32 v12, s8, 6
-; GFX9-G-O0-NEXT:    v_writelane_b32 v12, s9, 7
+; GFX9-G-O0-NEXT:    v_writelane_b32 v34, s8, 6
+; GFX9-G-O0-NEXT:    v_writelane_b32 v34, s9, 7
 ; GFX9-G-O0-NEXT:    s_or_saveexec_b64 s[20:21], -1
-; GFX9-G-O0-NEXT:    buffer_store_dword v12, off, s[0:3], s32 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v34, off, s[0:3], s32 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    s_mov_b64 exec, s[20:21]
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v15, s7
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, s6
@@ -2224,163 +2204,154 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-G-O0-NEXT:    s_branch .LBB0_6
 ; GFX9-G-O0-NEXT:  .LBB0_8: ; %udiv-bb1
 ; GFX9-G-O0-NEXT:    s_or_saveexec_b64 s[20:21], -1
-; GFX9-G-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload
 ; GFX9-G-O0-NEXT:    s_mov_b64 exec, s[20:21]
-; GFX9-G-O0-NEXT:    buffer_load_dword v9, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v10, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v12, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v9, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v10, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
 ; GFX9-G-O0-NEXT:    s_mov_b64 s[4:5], 0
 ; GFX9-G-O0-NEXT:    s_mov_b32 s6, 1
 ; GFX9-G-O0-NEXT:    s_mov_b32 s10, 0
 ; GFX9-G-O0-NEXT:    s_mov_b32 s9, 0
 ; GFX9-G-O0-NEXT:    s_mov_b32 s8, 0
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, s6
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, s6
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(3)
-; GFX9-G-O0-NEXT:    v_add_co_u32_e64 v5, s[6:7], v2, v5
-; GFX9-G-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:324 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, s10
+; GFX9-G-O0-NEXT:    v_add_co_u32_e64 v4, s[6:7], v1, v4
+; GFX9-G-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:324 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, s10
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(1)
-; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v6, s[6:7], v4, v6, s[6:7]
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, s9
-; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v8, s[6:7], v3, v4, s[6:7]
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, s8
-; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v7, s[6:7], v1, v3, s[6:7]
+; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v5, s[6:7], v3, v5, s[6:7]
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, s9
+; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v7, s[6:7], v2, v3, s[6:7]
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, s8
+; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v6, s[6:7], v0, v2, s[6:7]
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, v4
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v5
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, v6
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v15, v8
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v16, v7
-; GFX9-G-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, v7
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v15, v6
+; GFX9-G-O0-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-G-O0-NEXT:    buffer_store_dword v14, off, s[0:3], s32 offset:312 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v15, off, s[0:3], s32 offset:316 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:320 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:312 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v14, off, s[0:3], s32 offset:316 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v15, off, s[0:3], s32 offset:320 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    s_mov_b32 s6, 0x7f
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, s6
-; GFX9-G-O0-NEXT:    v_sub_co_u32_e64 v4, s[6:7], v1, v2
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, s6
+; GFX9-G-O0-NEXT:    v_sub_co_u32_e64 v3, s[6:7], v0, v1
 ; GFX9-G-O0-NEXT:    s_mov_b32 s7, 64
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, v10
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v9
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, s7
-; GFX9-G-O0-NEXT:    v_sub_u32_e64 v3, v4, v1
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, s7
-; GFX9-G-O0-NEXT:    v_sub_u32_e64 v9, v1, v4
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, v8
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, s7
+; GFX9-G-O0-NEXT:    v_sub_u32_e64 v2, v3, v0
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, s7
+; GFX9-G-O0-NEXT:    v_sub_u32_e64 v8, v0, v3
 ; GFX9-G-O0-NEXT:    s_mov_b32 s6, 0
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, s7
-; GFX9-G-O0-NEXT:    v_cmp_lt_u32_e64 s[8:9], v4, v1
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, s6
-; GFX9-G-O0-NEXT:    v_cmp_eq_u32_e64 s[6:7], v4, v1
-; GFX9-G-O0-NEXT:    v_lshlrev_b64 v[1:2], v4, v[13:14]
-; GFX9-G-O0-NEXT:    v_lshrrev_b64 v[18:19], v9, v[13:14]
-; GFX9-G-O0-NEXT:    v_lshlrev_b64 v[16:17], v4, v[11:12]
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v18
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, v19
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v15, v16
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, s7
+; GFX9-G-O0-NEXT:    v_cmp_lt_u32_e64 s[8:9], v3, v0
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, s6
+; GFX9-G-O0-NEXT:    v_cmp_eq_u32_e64 s[6:7], v3, v0
+; GFX9-G-O0-NEXT:    v_lshlrev_b64 v[0:1], v3, v[12:13]
+; GFX9-G-O0-NEXT:    v_lshrrev_b64 v[17:18], v8, v[12:13]
+; GFX9-G-O0-NEXT:    v_lshlrev_b64 v[15:16], v3, v[10:11]
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v17
-; GFX9-G-O0-NEXT:    v_or_b32_e64 v10, v10, v15
-; GFX9-G-O0-NEXT:    v_or_b32_e64 v4, v4, v9
-; GFX9-G-O0-NEXT:    v_lshlrev_b64 v[13:14], v3, v[13:14]
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v1
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, v2
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, v18
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, v15
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v16
+; GFX9-G-O0-NEXT:    v_or_b32_e64 v9, v9, v14
+; GFX9-G-O0-NEXT:    v_or_b32_e64 v3, v3, v8
+; GFX9-G-O0-NEXT:    v_lshlrev_b64 v[12:13], v2, v[12:13]
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v0
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v1
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, 0
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, 0
-; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v1, v1, v9, s[8:9]
-; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v3, v2, v3, s[8:9]
-; GFX9-G-O0-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v3
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v13
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, v14
-; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v9, v9, v10, s[8:9]
-; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v3, v3, v4, s[8:9]
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v11
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, v12
-; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v9, v9, v10, s[6:7]
-; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v3, v3, v4, s[6:7]
-; GFX9-G-O0-NEXT:    ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v3
-; GFX9-G-O0-NEXT:    ; kill: def $vgpr1_vgpr2 killed $vgpr1_vgpr2 def $vgpr1_vgpr2_vgpr3_vgpr4 killed $exec
+; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v0, v0, v8, s[8:9]
+; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v2, v1, v2, s[8:9]
+; GFX9-G-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, v2
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v12
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v13
+; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v8, v8, v9, s[8:9]
+; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v2, v2, v3, s[8:9]
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v10
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, v11
+; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v8, v8, v9, s[6:7]
+; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v2, v2, v3, s[6:7]
+; GFX9-G-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v2
+; GFX9-G-O0-NEXT:    ; kill: def $vgpr0_vgpr1 killed $vgpr0_vgpr1 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v8
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, v9
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, v10
-; GFX9-G-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-G-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:300 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:304 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:300 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:304 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    s_mov_b64 s[8:9], s[4:5]
 ; GFX9-G-O0-NEXT:    s_mov_b64 s[10:11], s[4:5]
-; GFX9-G-O0-NEXT:    v_or_b32_e64 v5, v5, v8
-; GFX9-G-O0-NEXT:    v_or_b32_e64 v7, v6, v7
-; GFX9-G-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, v7
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, s5
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, s4
-; GFX9-G-O0-NEXT:    v_cmp_ne_u64_e64 s[4:5], v[5:6], v[7:8]
-; GFX9-G-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    v_or_b32_e64 v4, v4, v7
+; GFX9-G-O0-NEXT:    v_or_b32_e64 v6, v5, v6
+; GFX9-G-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, v6
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, s5
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, s4
+; GFX9-G-O0-NEXT:    v_cmp_ne_u64_e64 s[4:5], v[4:5], v[6:7]
+; GFX9-G-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-G-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, s8
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, s9
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, s10
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, s11
-; GFX9-G-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, s8
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, s9
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, s10
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, s11
+; GFX9-G-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-G-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    s_mov_b64 s[6:7], exec
 ; GFX9-G-O0-NEXT:    s_and_b64 s[4:5], s[6:7], s[4:5]
 ; GFX9-G-O0-NEXT:    s_xor_b64 s[6:7], s[4:5], s[6:7]
-; GFX9-G-O0-NEXT:    v_writelane_b32 v0, s6, 4
-; GFX9-G-O0-NEXT:    v_writelane_b32 v0, s7, 5
+; GFX9-G-O0-NEXT:    v_writelane_b32 v34, s6, 4
+; GFX9-G-O0-NEXT:    v_writelane_b32 v34, s7, 5
 ; GFX9-G-O0-NEXT:    s_or_saveexec_b64 s[20:21], -1
-; GFX9-G-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v34, off, s[0:3], s32 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    s_mov_b64 exec, s[20:21]
 ; GFX9-G-O0-NEXT:    s_mov_b64 exec, s[4:5]
 ; GFX9-G-O0-NEXT:    s_cbranch_execz .LBB0_5
 ; GFX9-G-O0-NEXT:    s_branch .LBB0_7
 ; GFX9-G-O0-NEXT:  .LBB0_9: ; %udiv-end
-; GFX9-G-O0-NEXT:    s_or_saveexec_b64 s[20:21], -1
-; GFX9-G-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    s_mov_b64 exec, s[20:21]
-; GFX9-G-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v9, off, s[0:3], s32 offset:164 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v10, off, s[0:3], s32 offset:168 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:172 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v12, off, s[0:3], s32 offset:176 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:164 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v9, off, s[0:3], s32 offset:168 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v10, off, s[0:3], s32 offset:172 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:176 ; 4-byte Folded Reload
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, v9
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v10
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, v8
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v9
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v10
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v11
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v12
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, v1
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, v2
-; GFX9-G-O0-NEXT:    v_xor_b32_e64 v0, v0, v8
-; GFX9-G-O0-NEXT:    v_xor_b32_e64 v1, v1, v7
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v9
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, v10
-; GFX9-G-O0-NEXT:    v_xor_b32_e64 v2, v2, v6
-; GFX9-G-O0-NEXT:    v_xor_b32_e64 v3, v3, v5
-; GFX9-G-O0-NEXT:    v_sub_co_u32_e64 v0, s[4:5], v0, v8
-; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v1, s[4:5], v1, v7, s[4:5]
-; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v2, s[4:5], v2, v6, s[4:5]
-; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v3, s[4:5], v3, v5, s[4:5]
-; GFX9-G-O0-NEXT:    ; kill: killed $vgpr4
+; GFX9-G-O0-NEXT:    v_xor_b32_e64 v0, v0, v7
+; GFX9-G-O0-NEXT:    v_xor_b32_e64 v1, v1, v6
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v8
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, v9
+; GFX9-G-O0-NEXT:    v_xor_b32_e64 v2, v2, v5
+; GFX9-G-O0-NEXT:    v_xor_b32_e64 v3, v3, v4
+; GFX9-G-O0-NEXT:    v_sub_co_u32_e64 v0, s[4:5], v0, v7
+; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v1, s[4:5], v1, v6, s[4:5]
+; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v2, s[4:5], v2, v5, s[4:5]
+; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v3, s[4:5], v3, v4, s[4:5]
 ; GFX9-G-O0-NEXT:    s_xor_saveexec_b64 s[4:5], -1
-; GFX9-G-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:328 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    s_nop 0
-; GFX9-G-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:332 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:336 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v16, off, s[0:3], s32 offset:340 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v12, off, s[0:3], s32 offset:344 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v34, off, s[0:3], s32 offset:328 ; 4-byte Folded Reload
 ; GFX9-G-O0-NEXT:    s_mov_b64 exec, s[4:5]
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-G-O0-NEXT:    s_setpc_b64 s[30:31]
@@ -2575,252 +2546,245 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0:       ; %bb.0: ; %_udiv-special-cases
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-O0-NEXT:    s_xor_saveexec_b64 s[4:5], -1
-; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:300 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:304 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v30, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[4:5]
-; GFX9-O0-NEXT:    ; implicit-def: $vgpr8 : SGPR spill to VGPR lane
-; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v6
+; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v6
 ; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v2
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v1
-; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v0
-; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v2
+; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v0
+; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
-; GFX9-O0-NEXT:    ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v2
+; GFX9-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v1
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
-; GFX9-O0-NEXT:    ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v14, v3
+; GFX9-O0-NEXT:    ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v3
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
-; GFX9-O0-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v5
+; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v5
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
-; GFX9-O0-NEXT:    ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v7
+; GFX9-O0-NEXT:    ; kill: def $vgpr10 killed $vgpr10 def $vgpr10_vgpr11 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v7
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4_sgpr5
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4_sgpr5
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4_sgpr5
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4_sgpr5
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v10
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v11
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v12
-; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v2
+; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v1
-; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v0
+; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v12
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v13
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v14
-; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v8
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v9
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v10
-; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v12
-; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v2
-; GFX9-O0-NEXT:    v_or_b32_e64 v3, v8, v7
+; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v11
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v1
-; GFX9-O0-NEXT:    v_or_b32_e64 v1, v5, v6
-; GFX9-O0-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v3
+; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v1
+; GFX9-O0-NEXT:    v_or_b32_e64 v2, v7, v6
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v10
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v0
+; GFX9-O0-NEXT:    v_or_b32_e64 v0, v4, v5
+; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v2
 ; GFX9-O0-NEXT:    s_mov_b64 s[6:7], 0
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s6, 0
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s7, 1
-; GFX9-O0-NEXT:    v_cmp_eq_u64_e64 s[4:5], v[1:2], s[6:7]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v14
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v10
-; GFX9-O0-NEXT:    v_or_b32_e64 v15, v4, v2
+; GFX9-O0-NEXT:    ; implicit-def: $vgpr30 : SGPR spill to VGPR lane
+; GFX9-O0-NEXT:    v_writelane_b32 v30, s6, 0
+; GFX9-O0-NEXT:    v_writelane_b32 v30, s7, 1
+; GFX9-O0-NEXT:    v_cmp_eq_u64_e64 s[4:5], v[0:1], s[6:7]
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v13
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v9
-; GFX9-O0-NEXT:    v_or_b32_e64 v9, v3, v1
-; GFX9-O0-NEXT:    ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v15
-; GFX9-O0-NEXT:    v_cmp_eq_u64_e64 s[8:9], v[9:10], s[6:7]
+; GFX9-O0-NEXT:    v_or_b32_e64 v14, v3, v1
+; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v12
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v8
+; GFX9-O0-NEXT:    v_or_b32_e64 v8, v2, v0
+; GFX9-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v14
+; GFX9-O0-NEXT:    v_cmp_eq_u64_e64 s[8:9], v[8:9], s[6:7]
 ; GFX9-O0-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
-; GFX9-O0-NEXT:    v_ffbh_u32_e64 v6, v6
+; GFX9-O0-NEXT:    v_ffbh_u32_e64 v5, v5
 ; GFX9-O0-NEXT:    s_mov_b32 s9, 32
-; GFX9-O0-NEXT:    v_add_u32_e64 v6, v6, s9
-; GFX9-O0-NEXT:    v_ffbh_u32_e64 v7, v7
-; GFX9-O0-NEXT:    v_min_u32_e64 v6, v6, v7
+; GFX9-O0-NEXT:    v_add_u32_e64 v5, v5, s9
+; GFX9-O0-NEXT:    v_ffbh_u32_e64 v6, v6
+; GFX9-O0-NEXT:    v_min_u32_e64 v5, v5, v6
 ; GFX9-O0-NEXT:    s_mov_b32 s8, 0
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr10
-; GFX9-O0-NEXT:    v_mov_b32_e32 v9, s8
-; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v9
-; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v7
-; GFX9-O0-NEXT:    v_ffbh_u32_e64 v5, v5
-; GFX9-O0-NEXT:    v_add_u32_e64 v5, v5, s9
-; GFX9-O0-NEXT:    v_ffbh_u32_e64 v8, v8
-; GFX9-O0-NEXT:    v_min_u32_e64 v15, v5, v8
+; GFX9-O0-NEXT:    v_mov_b32_e32 v8, s8
+; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v8
+; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v6
+; GFX9-O0-NEXT:    v_ffbh_u32_e64 v4, v4
+; GFX9-O0-NEXT:    v_add_u32_e64 v4, v4, s9
+; GFX9-O0-NEXT:    v_ffbh_u32_e64 v7, v7
+; GFX9-O0-NEXT:    v_min_u32_e64 v14, v4, v7
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr10
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, s8
-; GFX9-O0-NEXT:    ; kill: def $vgpr15 killed $vgpr15 def $vgpr15_vgpr16 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v5
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s8
+; GFX9-O0-NEXT:    ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v4
 ; GFX9-O0-NEXT:    s_mov_b64 s[10:11], 64
-; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v15
+; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v14
 ; GFX9-O0-NEXT:    s_mov_b32 s12, s10
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v16
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v15
 ; GFX9-O0-NEXT:    s_mov_b32 s14, s11
-; GFX9-O0-NEXT:    v_add_co_u32_e64 v8, s[12:13], v8, s12
-; GFX9-O0-NEXT:    v_mov_b32_e32 v9, s14
-; GFX9-O0-NEXT:    v_addc_co_u32_e64 v5, s[12:13], v5, v9, s[12:13]
-; GFX9-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v5
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v9
+; GFX9-O0-NEXT:    v_add_co_u32_e64 v7, s[12:13], v7, s12
+; GFX9-O0-NEXT:    v_mov_b32_e32 v8, s14
+; GFX9-O0-NEXT:    v_addc_co_u32_e64 v4, s[12:13], v4, v8, s[12:13]
+; GFX9-O0-NEXT:    ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v4
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v8
 ; GFX9-O0-NEXT:    s_mov_b64 s[12:13], s[6:7]
-; GFX9-O0-NEXT:    v_cmp_ne_u64_e64 s[12:13], v[11:12], s[12:13]
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v5, v5, v10, s[12:13]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v8
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v9, v6, v7, s[12:13]
+; GFX9-O0-NEXT:    v_cmp_ne_u64_e64 s[12:13], v[10:11], s[12:13]
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v9, s[12:13]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v5
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v7
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v8, v5, v6, s[12:13]
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr12
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr12
-; GFX9-O0-NEXT:    ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v5
+; GFX9-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v4
+; GFX9-O0-NEXT:    v_ffbh_u32_e64 v4, v0
+; GFX9-O0-NEXT:    v_add_u32_e64 v4, v4, s9
 ; GFX9-O0-NEXT:    v_ffbh_u32_e64 v5, v1
-; GFX9-O0-NEXT:    v_add_u32_e64 v5, v5, s9
-; GFX9-O0-NEXT:    v_ffbh_u32_e64 v6, v2
-; GFX9-O0-NEXT:    v_min_u32_e64 v6, v5, v6
+; GFX9-O0-NEXT:    v_min_u32_e64 v5, v4, v5
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr12
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, s8
-; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v5
-; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v7
-; GFX9-O0-NEXT:    v_ffbh_u32_e64 v5, v3
-; GFX9-O0-NEXT:    v_add_u32_e64 v5, v5, s9
-; GFX9-O0-NEXT:    v_ffbh_u32_e64 v11, v4
-; GFX9-O0-NEXT:    v_min_u32_e64 v15, v5, v11
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s8
+; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v4
+; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v6
+; GFX9-O0-NEXT:    v_ffbh_u32_e64 v4, v2
+; GFX9-O0-NEXT:    v_add_u32_e64 v4, v4, s9
+; GFX9-O0-NEXT:    v_ffbh_u32_e64 v10, v3
+; GFX9-O0-NEXT:    v_min_u32_e64 v14, v4, v10
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr9
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, s8
-; GFX9-O0-NEXT:    ; kill: def $vgpr15 killed $vgpr15 def $vgpr15_vgpr16 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v5
-; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v15
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s8
+; GFX9-O0-NEXT:    ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v4
+; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v14
 ; GFX9-O0-NEXT:    s_mov_b32 s8, s10
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v16
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v15
 ; GFX9-O0-NEXT:    s_mov_b32 s10, s11
-; GFX9-O0-NEXT:    v_add_co_u32_e64 v11, s[8:9], v11, s8
-; GFX9-O0-NEXT:    v_mov_b32_e32 v12, s10
-; GFX9-O0-NEXT:    v_addc_co_u32_e64 v5, s[8:9], v5, v12, s[8:9]
-; GFX9-O0-NEXT:    ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v5
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v12
+; GFX9-O0-NEXT:    v_add_co_u32_e64 v10, s[8:9], v10, s8
+; GFX9-O0-NEXT:    v_mov_b32_e32 v11, s10
+; GFX9-O0-NEXT:    v_addc_co_u32_e64 v4, s[8:9], v4, v11, s[8:9]
+; GFX9-O0-NEXT:    ; kill: def $vgpr10 killed $vgpr10 def $vgpr10_vgpr11 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v4
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v11
 ; GFX9-O0-NEXT:    s_mov_b64 s[8:9], s[6:7]
-; GFX9-O0-NEXT:    v_cmp_ne_u64_e64 s[8:9], v[13:14], s[8:9]
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v5, v5, v8, s[8:9]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v11
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v6, v6, v7, s[8:9]
+; GFX9-O0-NEXT:    v_cmp_ne_u64_e64 s[8:9], v[12:13], s[8:9]
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v7, s[8:9]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v5
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v10
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v5, v5, v6, s[8:9]
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
-; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
+; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v4
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v5
-; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v6
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v8
+; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 killed $vgpr5_vgpr6 killed $exec
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v9
-; GFX9-O0-NEXT:    ; kill: def $vgpr7 killed $vgpr7 killed $vgpr6_vgpr7 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v10
 ; GFX9-O0-NEXT:    s_mov_b32 s10, s6
 ; GFX9-O0-NEXT:    s_mov_b32 s11, s7
-; GFX9-O0-NEXT:    v_sub_co_u32_e32 v5, vcc, v5, v8
-; GFX9-O0-NEXT:    v_subb_co_u32_e32 v9, vcc, v6, v7, vcc
-; GFX9-O0-NEXT:    v_mov_b32_e32 v7, s10
+; GFX9-O0-NEXT:    v_sub_co_u32_e32 v4, vcc, v4, v7
+; GFX9-O0-NEXT:    v_subb_co_u32_e32 v8, vcc, v5, v6, vcc
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v6, s10
-; GFX9-O0-NEXT:    v_subb_co_u32_e32 v8, vcc, v6, v7, vcc
-; GFX9-O0-NEXT:    v_mov_b32_e32 v7, s11
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, s10
+; GFX9-O0-NEXT:    v_subb_co_u32_e32 v7, vcc, v5, v6, vcc
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v6, s11
-; GFX9-O0-NEXT:    v_subb_co_u32_e32 v7, vcc, v6, v7, vcc
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, s11
+; GFX9-O0-NEXT:    v_subb_co_u32_e32 v6, vcc, v5, v6, vcc
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
-; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v9
+; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v8
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
-; GFX9-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v7
-; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v9
+; GFX9-O0-NEXT:    ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v6
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v8
-; GFX9-O0-NEXT:    buffer_store_dword v10, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v7
+; GFX9-O0-NEXT:    buffer_store_dword v9, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v11, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v6
+; GFX9-O0-NEXT:    buffer_store_dword v10, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v5
-; GFX9-O0-NEXT:    buffer_store_dword v10, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v4
+; GFX9-O0-NEXT:    buffer_store_dword v9, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v11, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v10, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_mov_b64 s[8:9], s[6:7]
-; GFX9-O0-NEXT:    v_cmp_eq_u64_e64 s[8:9], v[8:9], s[8:9]
+; GFX9-O0-NEXT:    v_cmp_eq_u64_e64 s[8:9], v[7:8], s[8:9]
 ; GFX9-O0-NEXT:    s_mov_b64 s[12:13], 0x7f
 ; GFX9-O0-NEXT:    s_mov_b64 s[14:15], s[12:13]
-; GFX9-O0-NEXT:    v_cmp_gt_u64_e64 s[14:15], v[5:6], s[14:15]
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[14:15]
+; GFX9-O0-NEXT:    v_cmp_gt_u64_e64 s[14:15], v[4:5], s[14:15]
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[14:15]
 ; GFX9-O0-NEXT:    s_mov_b64 s[14:15], s[6:7]
-; GFX9-O0-NEXT:    v_cmp_ne_u64_e64 s[14:15], v[8:9], s[14:15]
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s[14:15]
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v7, v7, v10, s[8:9]
-; GFX9-O0-NEXT:    v_and_b32_e64 v7, 1, v7
-; GFX9-O0-NEXT:    v_cmp_eq_u32_e64 s[8:9], v7, 1
+; GFX9-O0-NEXT:    v_cmp_ne_u64_e64 s[14:15], v[7:8], s[14:15]
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s[14:15]
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v6, v6, v9, s[8:9]
+; GFX9-O0-NEXT:    v_and_b32_e64 v6, 1, v6
+; GFX9-O0-NEXT:    v_cmp_eq_u32_e64 s[8:9], v6, 1
 ; GFX9-O0-NEXT:    s_or_b64 s[8:9], s[4:5], s[8:9]
 ; GFX9-O0-NEXT:    s_mov_b64 s[4:5], -1
 ; GFX9-O0-NEXT:    s_xor_b64 s[4:5], s[8:9], s[4:5]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v6
+; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v5
 ; GFX9-O0-NEXT:    s_mov_b32 s14, s13
-; GFX9-O0-NEXT:    v_xor_b32_e64 v7, v7, s14
-; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 killed $vgpr5_vgpr6 killed $exec
+; GFX9-O0-NEXT:    v_xor_b32_e64 v6, v6, s14
+; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
 ; GFX9-O0-NEXT:    ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13
-; GFX9-O0-NEXT:    v_xor_b32_e64 v5, v5, s12
-; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v7
-; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v9
-; GFX9-O0-NEXT:    v_or_b32_e64 v7, v7, v10
-; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 killed $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v8
-; GFX9-O0-NEXT:    v_or_b32_e64 v5, v5, v6
-; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v7
-; GFX9-O0-NEXT:    v_cmp_ne_u64_e64 s[6:7], v[5:6], s[6:7]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, s11
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v5, v2, v5, s[8:9]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, s10
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v1, v1, v2, s[8:9]
+; GFX9-O0-NEXT:    v_xor_b32_e64 v4, v4, s12
+; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v6
+; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v5
+; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v8
+; GFX9-O0-NEXT:    v_or_b32_e64 v6, v6, v9
+; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v7
+; GFX9-O0-NEXT:    v_or_b32_e64 v4, v4, v5
+; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v6
+; GFX9-O0-NEXT:    v_cmp_ne_u64_e64 s[6:7], v[4:5], s[6:7]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s11
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v4, v1, v4, s[8:9]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, s10
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v0, v0, v1, s[8:9]
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr12
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr12
-; GFX9-O0-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v5
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, s11
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v5, v4, v5, s[8:9]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s10
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v3, v3, v4, s[8:9]
+; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v4
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s11
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v4, v3, v4, s[8:9]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v3, s10
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v2, v2, v3, s[8:9]
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
-; GFX9-O0-NEXT:    ; kill: def $vgpr3 killed $vgpr3 def $vgpr3_vgpr4 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v5
+; GFX9-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v4
 ; GFX9-O0-NEXT:    s_and_b64 s[6:7], s[4:5], s[6:7]
-; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_mov_b64 s[4:5], exec
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s4, 2
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s5, 3
+; GFX9-O0-NEXT:    v_writelane_b32 v30, s4, 2
+; GFX9-O0-NEXT:    v_writelane_b32 v30, s5, 3
 ; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
 ; GFX9-O0-NEXT:    s_and_b64 s[4:5], s[4:5], s[6:7]
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[4:5]
@@ -2828,11 +2792,11 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    s_branch .LBB1_8
 ; GFX9-O0-NEXT:  .LBB1_1: ; %Flow
 ; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    v_readlane_b32 s4, v0, 4
-; GFX9-O0-NEXT:    v_readlane_b32 s5, v0, 5
+; GFX9-O0-NEXT:    v_readlane_b32 s4, v30, 4
+; GFX9-O0-NEXT:    v_readlane_b32 s5, v30, 5
 ; GFX9-O0-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX9-O0-NEXT:  ; %bb.2: ; %Flow
 ; GFX9-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload
@@ -2859,11 +2823,11 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    s_branch .LBB1_5
 ; GFX9-O0-NEXT:  .LBB1_3: ; %Flow2
 ; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    v_readlane_b32 s4, v4, 2
-; GFX9-O0-NEXT:    v_readlane_b32 s5, v4, 3
+; GFX9-O0-NEXT:    v_readlane_b32 s4, v30, 2
+; GFX9-O0-NEXT:    v_readlane_b32 s5, v30, 3
 ; GFX9-O0-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
@@ -2919,11 +2883,11 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    s_branch .LBB1_3
 ; GFX9-O0-NEXT:  .LBB1_5: ; %Flow1
 ; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    v_readlane_b32 s4, v8, 6
-; GFX9-O0-NEXT:    v_readlane_b32 s5, v8, 7
+; GFX9-O0-NEXT:    v_readlane_b32 s4, v30, 6
+; GFX9-O0-NEXT:    v_readlane_b32 s5, v30, 7
 ; GFX9-O0-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload
@@ -2950,85 +2914,85 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:  .LBB1_6: ; %udiv-do-while
 ; GFX9-O0-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT:    buffer_load_dword v16, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    v_readlane_b32 s6, v16, 8
-; GFX9-O0-NEXT:    v_readlane_b32 s7, v16, 9
+; GFX9-O0-NEXT:    v_readlane_b32 s6, v30, 8
+; GFX9-O0-NEXT:    v_readlane_b32 s7, v30, 9
 ; GFX9-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:184 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:188 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:192 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:196 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v23, off, s[0:3], s32 offset:200 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v24, off, s[0:3], s32 offset:204 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v22, off, s[0:3], s32 offset:200 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v23, off, s[0:3], s32 offset:204 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:208 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:212 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v17, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v18, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v16, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v17, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:224 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v9, off, s[0:3], s32 offset:228 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v27, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v28, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v25, off, s[0:3], s32 offset:240 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v26, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v19, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v20, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v21, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v22, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v26, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v27, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v24, off, s[0:3], s32 offset:240 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v25, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v18, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v19, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v20, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v21, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v14, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v15, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v12, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_mov_b32 s4, 63
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(16)
-; GFX9-O0-NEXT:    v_lshrrev_b64 v[29:30], s4, v[2:3]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v30
+; GFX9-O0-NEXT:    v_lshrrev_b64 v[28:29], s4, v[2:3]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v29
 ; GFX9-O0-NEXT:    s_mov_b32 s5, 1
-; GFX9-O0-NEXT:    v_lshlrev_b64 v[23:24], s5, v[23:24]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v24
+; GFX9-O0-NEXT:    v_lshlrev_b64 v[22:23], s5, v[22:23]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v23
 ; GFX9-O0-NEXT:    v_or_b32_e64 v4, v4, v5
-; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v29
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v23
-; GFX9-O0-NEXT:    v_or_b32_e64 v23, v5, v10
-; GFX9-O0-NEXT:    ; kill: def $vgpr23 killed $vgpr23 def $vgpr23_vgpr24 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v24, v4
-; GFX9-O0-NEXT:    v_lshlrev_b64 v[29:30], s5, v[2:3]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v28
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v22
+; GFX9-O0-NEXT:    v_or_b32_e64 v22, v5, v10
+; GFX9-O0-NEXT:    ; kill: def $vgpr22 killed $vgpr22 def $vgpr22_vgpr23 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v23, v4
+; GFX9-O0-NEXT:    v_lshlrev_b64 v[28:29], s5, v[2:3]
 ; GFX9-O0-NEXT:    v_lshrrev_b64 v[4:5], s4, v[6:7]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v30
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v29
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v5
 ; GFX9-O0-NEXT:    v_or_b32_e64 v2, v2, v3
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v29
+; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v28
 ; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
 ; GFX9-O0-NEXT:    v_or_b32_e64 v4, v3, v4
 ; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v2
 ; GFX9-O0-NEXT:    v_lshlrev_b64 v[2:3], s5, v[0:1]
-; GFX9-O0-NEXT:    v_lshlrev_b64 v[29:30], s5, v[6:7]
+; GFX9-O0-NEXT:    v_lshlrev_b64 v[28:29], s5, v[6:7]
 ; GFX9-O0-NEXT:    v_lshrrev_b64 v[0:1], s4, v[0:1]
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v1
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v30
+; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v29
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(10)
-; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v28
+; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v27
 ; GFX9-O0-NEXT:    v_or3_b32 v6, v6, v7, v10
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v0
-; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v29
-; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v27
+; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v28
+; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v26
 ; GFX9-O0-NEXT:    v_or3_b32 v0, v0, v1, v7
 ; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v6
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v3
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(8)
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v26
+; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v25
 ; GFX9-O0-NEXT:    v_or_b32_e64 v6, v6, v7
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v2
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v25
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v24
 ; GFX9-O0-NEXT:    v_or_b32_e64 v2, v2, v3
 ; GFX9-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v6
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v4
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v5
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v23
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v24
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v22
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v23
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v11
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v14
@@ -3053,22 +3017,22 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v12
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v12, 0
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v13, 0
-; GFX9-O0-NEXT:    v_mov_b32_e32 v23, v22
-; GFX9-O0-NEXT:    v_and_b32_e64 v23, v7, v23
-; GFX9-O0-NEXT:    v_and_b32_e64 v21, v11, v21
-; GFX9-O0-NEXT:    ; kill: def $vgpr21 killed $vgpr21 def $vgpr21_vgpr22 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v22, v23
-; GFX9-O0-NEXT:    v_mov_b32_e32 v23, v20
-; GFX9-O0-NEXT:    v_and_b32_e64 v7, v7, v23
-; GFX9-O0-NEXT:    v_and_b32_e64 v23, v11, v19
-; GFX9-O0-NEXT:    ; kill: def $vgpr23 killed $vgpr23 def $vgpr23_vgpr24 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v24, v7
-; GFX9-O0-NEXT:    v_mov_b32_e32 v20, v23
-; GFX9-O0-NEXT:    v_mov_b32_e32 v19, v24
-; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v21
-; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v22
-; GFX9-O0-NEXT:    v_sub_co_u32_e32 v6, vcc, v6, v20
-; GFX9-O0-NEXT:    v_subb_co_u32_e32 v10, vcc, v10, v19, vcc
+; GFX9-O0-NEXT:    v_mov_b32_e32 v22, v21
+; GFX9-O0-NEXT:    v_and_b32_e64 v22, v7, v22
+; GFX9-O0-NEXT:    v_and_b32_e64 v20, v11, v20
+; GFX9-O0-NEXT:    ; kill: def $vgpr20 killed $vgpr20 def $vgpr20_vgpr21 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v21, v22
+; GFX9-O0-NEXT:    v_mov_b32_e32 v22, v19
+; GFX9-O0-NEXT:    v_and_b32_e64 v7, v7, v22
+; GFX9-O0-NEXT:    v_and_b32_e64 v22, v11, v18
+; GFX9-O0-NEXT:    ; kill: def $vgpr22 killed $vgpr22 def $vgpr22_vgpr23 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v23, v7
+; GFX9-O0-NEXT:    v_mov_b32_e32 v19, v22
+; GFX9-O0-NEXT:    v_mov_b32_e32 v18, v23
+; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v20
+; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v21
+; GFX9-O0-NEXT:    v_sub_co_u32_e32 v6, vcc, v6, v19
+; GFX9-O0-NEXT:    v_subb_co_u32_e32 v10, vcc, v10, v18, vcc
 ; GFX9-O0-NEXT:    v_subb_co_u32_e32 v4, vcc, v4, v11, vcc
 ; GFX9-O0-NEXT:    v_subb_co_u32_e32 v7, vcc, v5, v7, vcc
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
@@ -3084,66 +3048,66 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    s_mov_b64 s[8:9], -1
 ; GFX9-O0-NEXT:    s_mov_b32 s5, s8
 ; GFX9-O0-NEXT:    s_mov_b32 s4, s9
-; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v17
-; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v18
-; GFX9-O0-NEXT:    v_mov_b32_e32 v17, s5
-; GFX9-O0-NEXT:    v_add_co_u32_e32 v20, vcc, v11, v17
+; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v16
+; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v17
+; GFX9-O0-NEXT:    v_mov_b32_e32 v16, s5
+; GFX9-O0-NEXT:    v_add_co_u32_e32 v19, vcc, v11, v16
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v11, s4
 ; GFX9-O0-NEXT:    v_addc_co_u32_e32 v9, vcc, v9, v11, vcc
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v11, s5
-; GFX9-O0-NEXT:    v_addc_co_u32_e32 v17, vcc, v10, v11, vcc
+; GFX9-O0-NEXT:    v_addc_co_u32_e32 v16, vcc, v10, v11, vcc
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v10, s4
 ; GFX9-O0-NEXT:    v_addc_co_u32_e32 v8, vcc, v8, v10, vcc
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
-; GFX9-O0-NEXT:    ; kill: def $vgpr20 killed $vgpr20 def $vgpr20_vgpr21 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v21, v9
+; GFX9-O0-NEXT:    ; kill: def $vgpr19 killed $vgpr19 def $vgpr19_vgpr20 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v20, v9
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
-; GFX9-O0-NEXT:    ; kill: def $vgpr17 killed $vgpr17 def $vgpr17_vgpr18 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v18, v8
-; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v17
-; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v18
-; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v20
-; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v21
-; GFX9-O0-NEXT:    v_mov_b32_e32 v22, v18
-; GFX9-O0-NEXT:    v_mov_b32_e32 v19, v21
-; GFX9-O0-NEXT:    v_or_b32_e64 v19, v19, v22
-; GFX9-O0-NEXT:    v_mov_b32_e32 v18, v17
-; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v20
-; GFX9-O0-NEXT:    v_or_b32_e64 v17, v17, v18
-; GFX9-O0-NEXT:    ; kill: def $vgpr17 killed $vgpr17 def $vgpr17_vgpr18 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v18, v19
-; GFX9-O0-NEXT:    v_cmp_eq_u64_e64 s[4:5], v[17:18], v[12:13]
+; GFX9-O0-NEXT:    ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v8
+; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v16
+; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v17
+; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v19
+; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v20
+; GFX9-O0-NEXT:    v_mov_b32_e32 v21, v17
+; GFX9-O0-NEXT:    v_mov_b32_e32 v18, v20
+; GFX9-O0-NEXT:    v_or_b32_e64 v18, v18, v21
+; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v16
+; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v19
+; GFX9-O0-NEXT:    v_or_b32_e64 v16, v16, v17
+; GFX9-O0-NEXT:    ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v18
+; GFX9-O0-NEXT:    v_cmp_eq_u64_e64 s[4:5], v[16:17], v[12:13]
 ; GFX9-O0-NEXT:    s_or_b64 s[4:5], s[4:5], s[6:7]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v18, v3
-; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v2
-; GFX9-O0-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v18, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    v_mov_b32_e32 v18, v1
-; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v0
-; GFX9-O0-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v18, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    v_mov_b32_e32 v18, v15
-; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v14
-; GFX9-O0-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v18, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    v_mov_b32_e32 v18, v13
-; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v12
-; GFX9-O0-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v18, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v3
+; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v2
+; GFX9-O0-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-O0-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v1
+; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v0
+; GFX9-O0-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-O0-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v15
+; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v14
+; GFX9-O0-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-O0-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v13
+; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v12
+; GFX9-O0-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-O0-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_mov_b64 s[6:7], s[4:5]
-; GFX9-O0-NEXT:    v_writelane_b32 v16, s6, 4
-; GFX9-O0-NEXT:    v_writelane_b32 v16, s7, 5
+; GFX9-O0-NEXT:    v_writelane_b32 v30, s6, 4
+; GFX9-O0-NEXT:    v_writelane_b32 v30, s7, 5
 ; GFX9-O0-NEXT:    s_mov_b64 s[6:7], s[4:5]
-; GFX9-O0-NEXT:    v_writelane_b32 v16, s6, 8
-; GFX9-O0-NEXT:    v_writelane_b32 v16, s7, 9
+; GFX9-O0-NEXT:    v_writelane_b32 v30, s6, 8
+; GFX9-O0-NEXT:    v_writelane_b32 v30, s7, 9
 ; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT:    buffer_store_dword v16, off, s[0:3], s32 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
 ; GFX9-O0-NEXT:    buffer_store_dword v14, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
@@ -3173,6 +3137,9 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    s_cbranch_execnz .LBB1_6
 ; GFX9-O0-NEXT:    s_branch .LBB1_1
 ; GFX9-O0-NEXT:  .LBB1_7: ; %udiv-preheader
+; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
+; GFX9-O0-NEXT:    buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
 ; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload
@@ -3181,52 +3148,49 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    buffer_load_dword v9, off, s[0:3], s32 offset:284 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v10, off, s[0:3], s32 offset:288 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:292 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT:    buffer_load_dword v16, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
-; GFX9-O0-NEXT:    buffer_load_dword v17, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v18, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v16, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v17, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v13, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v14, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v19, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v20, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v21, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v22, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    s_waitcnt vmcnt(9)
+; GFX9-O0-NEXT:    buffer_load_dword v18, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v19, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v20, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v21, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    s_waitcnt vmcnt(8)
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v10
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    v_lshrrev_b64 v[6:7], v4, v[21:22]
+; GFX9-O0-NEXT:    v_lshrrev_b64 v[6:7], v4, v[20:21]
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v7
 ; GFX9-O0-NEXT:    s_mov_b32 s6, 64
 ; GFX9-O0-NEXT:    v_sub_u32_e64 v12, s6, v4
-; GFX9-O0-NEXT:    v_lshlrev_b64 v[23:24], v12, v[19:20]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v24
+; GFX9-O0-NEXT:    v_lshlrev_b64 v[22:23], v12, v[18:19]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v23
 ; GFX9-O0-NEXT:    v_or_b32_e64 v5, v5, v12
 ; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 killed $vgpr6_vgpr7 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v23
+; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v22
 ; GFX9-O0-NEXT:    v_or_b32_e64 v6, v6, v7
 ; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v5
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v7
 ; GFX9-O0-NEXT:    v_cmp_lt_u32_e64 s[4:5], v4, s6
 ; GFX9-O0-NEXT:    v_sub_u32_e64 v5, v4, s6
-; GFX9-O0-NEXT:    v_lshrrev_b64 v[23:24], v5, v[19:20]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v24
+; GFX9-O0-NEXT:    v_lshrrev_b64 v[22:23], v5, v[18:19]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v23
 ; GFX9-O0-NEXT:    v_cndmask_b32_e64 v5, v5, v12, s[4:5]
 ; GFX9-O0-NEXT:    s_mov_b32 s6, 0
 ; GFX9-O0-NEXT:    v_cmp_eq_u32_e64 s[6:7], v4, s6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v22
+; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v21
 ; GFX9-O0-NEXT:    v_cndmask_b32_e64 v5, v5, v12, s[6:7]
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v23
+; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v22
 ; GFX9-O0-NEXT:    v_cndmask_b32_e64 v6, v6, v7, s[4:5]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v21
+; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v20
 ; GFX9-O0-NEXT:    v_cndmask_b32_e64 v6, v6, v7, s[6:7]
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
 ; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v5
-; GFX9-O0-NEXT:    v_lshrrev_b64 v[4:5], v4, v[19:20]
+; GFX9-O0-NEXT:    v_lshrrev_b64 v[4:5], v4, v[18:19]
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v5
 ; GFX9-O0-NEXT:    s_mov_b64 s[6:7], 0
 ; GFX9-O0-NEXT:    s_mov_b32 s8, s7
@@ -3245,12 +3209,12 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    s_mov_b64 s[8:9], -1
 ; GFX9-O0-NEXT:    s_mov_b32 s5, s8
 ; GFX9-O0-NEXT:    s_mov_b32 s4, s9
-; GFX9-O0-NEXT:    v_mov_b32_e32 v14, v17
-; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v18
-; GFX9-O0-NEXT:    v_mov_b32_e32 v17, s5
-; GFX9-O0-NEXT:    v_add_co_u32_e32 v12, vcc, v12, v17
-; GFX9-O0-NEXT:    v_mov_b32_e32 v17, s4
-; GFX9-O0-NEXT:    v_addc_co_u32_e32 v17, vcc, v15, v17, vcc
+; GFX9-O0-NEXT:    v_mov_b32_e32 v14, v16
+; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v17
+; GFX9-O0-NEXT:    v_mov_b32_e32 v16, s5
+; GFX9-O0-NEXT:    v_add_co_u32_e32 v12, vcc, v12, v16
+; GFX9-O0-NEXT:    v_mov_b32_e32 v16, s4
+; GFX9-O0-NEXT:    v_addc_co_u32_e32 v16, vcc, v15, v16, vcc
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v15, s5
 ; GFX9-O0-NEXT:    v_addc_co_u32_e32 v14, vcc, v14, v15, vcc
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v15, s4
@@ -3262,7 +3226,7 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
 ; GFX9-O0-NEXT:    ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v17
+; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v16
 ; GFX9-O0-NEXT:    s_mov_b64 s[8:9], s[6:7]
 ; GFX9-O0-NEXT:    buffer_store_dword v14, off, s[0:3], s32 offset:248 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
@@ -3275,10 +3239,10 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v14, s8
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v13, s7
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v12, s6
-; GFX9-O0-NEXT:    v_writelane_b32 v16, s4, 8
-; GFX9-O0-NEXT:    v_writelane_b32 v16, s5, 9
+; GFX9-O0-NEXT:    v_writelane_b32 v30, s4, 8
+; GFX9-O0-NEXT:    v_writelane_b32 v30, s5, 9
 ; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT:    buffer_store_dword v16, off, s[0:3], s32 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
 ; GFX9-O0-NEXT:    buffer_store_dword v14, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
@@ -3307,163 +3271,155 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    s_branch .LBB1_6
 ; GFX9-O0-NEXT:  .LBB1_8: ; %udiv-bb1
 ; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
-; GFX9-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v12, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v10, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_mov_b64 s[6:7], 1
 ; GFX9-O0-NEXT:    s_mov_b32 s5, s6
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v1
+; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v0
 ; GFX9-O0-NEXT:    s_mov_b32 s4, s7
 ; GFX9-O0-NEXT:    s_mov_b64 s[6:7], 0
 ; GFX9-O0-NEXT:    s_mov_b32 s8, s6
 ; GFX9-O0-NEXT:    s_mov_b32 s9, s7
-; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v5
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, s5
-; GFX9-O0-NEXT:    v_add_co_u32_e32 v9, vcc, v4, v5
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, s4
-; GFX9-O0-NEXT:    v_addc_co_u32_e32 v2, vcc, v2, v5, vcc
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, s8
-; GFX9-O0-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v5, vcc
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, s9
-; GFX9-O0-NEXT:    v_addc_co_u32_e32 v3, vcc, v3, v5, vcc
+; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v4
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v5
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s5
+; GFX9-O0-NEXT:    v_add_co_u32_e32 v8, vcc, v3, v4
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s4
+; GFX9-O0-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v4, vcc
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s8
+; GFX9-O0-NEXT:    v_addc_co_u32_e32 v0, vcc, v0, v4, vcc
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s9
+; GFX9-O0-NEXT:    v_addc_co_u32_e32 v2, vcc, v2, v4, vcc
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
-; GFX9-O0-NEXT:    ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v2
+; GFX9-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v1
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
-; GFX9-O0-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v3
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v2
+; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v2
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v1
-; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v0
+; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v8
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v9
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v10
-; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_mov_b32 s4, 0x7f
-; GFX9-O0-NEXT:    v_sub_u32_e64 v3, s4, v4
-; GFX9-O0-NEXT:    v_lshlrev_b64 v[5:6], v3, v[11:12]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v6
+; GFX9-O0-NEXT:    v_sub_u32_e64 v2, s4, v3
+; GFX9-O0-NEXT:    v_lshlrev_b64 v[4:5], v2, v[10:11]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v5
 ; GFX9-O0-NEXT:    s_mov_b32 s4, 64
-; GFX9-O0-NEXT:    v_sub_u32_e64 v14, s4, v3
-; GFX9-O0-NEXT:    v_lshrrev_b64 v[14:15], v14, v[7:8]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v15
-; GFX9-O0-NEXT:    v_or_b32_e64 v13, v13, v16
-; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 killed $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v14
-; GFX9-O0-NEXT:    v_or_b32_e64 v5, v5, v6
-; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v13
-; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v6
-; GFX9-O0-NEXT:    v_cmp_lt_u32_e64 s[4:5], v3, s4
+; GFX9-O0-NEXT:    v_sub_u32_e64 v13, s4, v2
+; GFX9-O0-NEXT:    v_lshrrev_b64 v[13:14], v13, v[6:7]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v14
+; GFX9-O0-NEXT:    v_or_b32_e64 v12, v12, v15
+; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v13
+; GFX9-O0-NEXT:    v_or_b32_e64 v4, v4, v5
+; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v12
+; GFX9-O0-NEXT:    v_mov_b32_e32 v14, v5
+; GFX9-O0-NEXT:    v_cmp_lt_u32_e64 s[4:5], v2, s4
 ; GFX9-O0-NEXT:    s_mov_b32 s10, 63
-; GFX9-O0-NEXT:    v_sub_u32_e64 v4, s10, v4
-; GFX9-O0-NEXT:    v_lshlrev_b64 v[13:14], v4, v[7:8]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v14
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v15, s[4:5]
+; GFX9-O0-NEXT:    v_sub_u32_e64 v3, s10, v3
+; GFX9-O0-NEXT:    v_lshlrev_b64 v[12:13], v3, v[6:7]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v13
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v3, v3, v14, s[4:5]
 ; GFX9-O0-NEXT:    s_mov_b32 s10, 0
-; GFX9-O0-NEXT:    v_cmp_eq_u32_e64 s[10:11], v3, s10
-; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v12
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v15, s[10:11]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v5
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v13
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v5, v5, v6, s[4:5]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v11
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v5, v5, v6, s[10:11]
+; GFX9-O0-NEXT:    v_cmp_eq_u32_e64 s[10:11], v2, s10
+; GFX9-O0-NEXT:    v_mov_b32_e32 v14, v11
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v3, v3, v14, s[10:11]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v4
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v12
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v5, s[4:5]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v10
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v5, s[10:11]
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr10
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr10
-; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v4
-; GFX9-O0-NEXT:    v_lshlrev_b64 v[7:8], v3, v[7:8]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v8
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, s9
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v3, v3, v4, s[4:5]
-; GFX9-O0-NEXT:    ; kill: def $vgpr7 killed $vgpr7 killed $vgpr7_vgpr8 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s8
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v7, v4, v7, s[4:5]
+; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v3
+; GFX9-O0-NEXT:    v_lshlrev_b64 v[6:7], v2, v[6:7]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v7
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, s9
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v2, v2, v3, s[4:5]
+; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 killed $vgpr6_vgpr7 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v3, s8
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v6, v3, v6, s[4:5]
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
-; GFX9-O0-NEXT:    ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v3
-; GFX9-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v2
+; GFX9-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:268 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v2
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v10
-; GFX9-O0-NEXT:    v_or_b32_e64 v3, v3, v4
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v1
-; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v9
-; GFX9-O0-NEXT:    v_or_b32_e64 v1, v1, v2
-; GFX9-O0-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v3
-; GFX9-O0-NEXT:    v_cmp_ne_u64_e64 s[4:5], v[1:2], s[6:7]
+; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:268 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v1
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v9
+; GFX9-O0-NEXT:    v_or_b32_e64 v2, v2, v3
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v0
+; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v8
+; GFX9-O0-NEXT:    v_or_b32_e64 v0, v0, v1
+; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v2
+; GFX9-O0-NEXT:    v_cmp_ne_u64_e64 s[4:5], v[0:1], s[6:7]
 ; GFX9-O0-NEXT:    s_mov_b64 s[8:9], s[6:7]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, s8
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s9
-; GFX9-O0-NEXT:    v_mov_b32_e32 v1, s6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, s7
-; GFX9-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, s8
+; GFX9-O0-NEXT:    v_mov_b32_e32 v3, s9
+; GFX9-O0-NEXT:    v_mov_b32_e32 v0, s6
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, s7
+; GFX9-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_mov_b64 s[6:7], exec
 ; GFX9-O0-NEXT:    s_and_b64 s[4:5], s[6:7], s[4:5]
 ; GFX9-O0-NEXT:    s_xor_b64 s[6:7], s[4:5], s[6:7]
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s6, 6
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s7, 7
+; GFX9-O0-NEXT:    v_writelane_b32 v30, s6, 6
+; GFX9-O0-NEXT:    v_writelane_b32 v30, s7, 7
 ; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[4:5]
 ; GFX9-O0-NEXT:    s_cbranch_execz .LBB1_5
 ; GFX9-O0-NEXT:    s_branch .LBB1_7
 ; GFX9-O0-NEXT:  .LBB1_9: ; %udiv-end
-; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
-; GFX9-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:144 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:148 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:136 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:140 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:144 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:148 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:136 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:140 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_mov_b32 s4, 32
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    v_lshrrev_b64 v[0:1], s4, v[7:8]
+; GFX9-O0-NEXT:    v_lshrrev_b64 v[0:1], s4, v[6:7]
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v0
-; GFX9-O0-NEXT:    v_lshrrev_b64 v[2:3], s4, v[5:6]
+; GFX9-O0-NEXT:    v_lshrrev_b64 v[2:3], s4, v[4:5]
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v2
-; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v7
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v5
-; GFX9-O0-NEXT:    ; kill: killed $vgpr4
+; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v6
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v4
 ; GFX9-O0-NEXT:    s_xor_saveexec_b64 s[4:5], -1
-; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:296 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    s_nop 0
-; GFX9-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:300 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:304 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v16, off, s[0:3], s32 offset:308 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v30, off, s[0:3], s32 offset:296 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[4:5]
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-O0-NEXT:    s_setpc_b64 s[30:31]
@@ -3653,83 +3609,94 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-G-O0:       ; %bb.0: ; %_udiv-special-cases
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-G-O0-NEXT:    s_xor_saveexec_b64 s[4:5], -1
-; GFX9-G-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:300 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:304 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:312 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:316 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v34, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    s_mov_b64 exec, s[4:5]
-; GFX9-G-O0-NEXT:    ; implicit-def: $vgpr8 : SGPR spill to VGPR lane
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v0
-; GFX9-G-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GFX9-G-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    s_mov_b64 exec, s[18:19]
-; GFX9-G-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9_vgpr10_vgpr11 killed $exec
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v1
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v2
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, v3
-; GFX9-G-O0-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-G-O0-NEXT:    buffer_store_dword v9, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v10, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v11, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v4
-; GFX9-G-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    s_nop 0
-; GFX9-G-O0-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, v5
-; GFX9-G-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v6
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v7
-; GFX9-G-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6_vgpr7_vgpr8 killed $exec
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, v11
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, v10
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v9
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v1
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v2
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v3
+; GFX9-G-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, v10
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v9
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, v8
+; GFX9-G-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-G-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v5
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v6
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v7
+; GFX9-G-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5_vgpr6_vgpr7 killed $exec
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, v10
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, v9
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, v8
+; GFX9-G-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-G-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    s_mov_b64 s[4:5], 0
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, v6
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v5
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, v8
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v4
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v7
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, v6
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v9
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v10
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v11
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, v13
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, v14
-; GFX9-G-O0-NEXT:    v_or_b32_e64 v9, v9, v12
-; GFX9-G-O0-NEXT:    v_or_b32_e64 v11, v10, v11
-; GFX9-G-O0-NEXT:    ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v11
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, s5
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, s4
-; GFX9-G-O0-NEXT:    v_cmp_eq_u64_e64 s[6:7], v[9:10], v[11:12]
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, v2
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, v12
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v13
+; GFX9-G-O0-NEXT:    v_or_b32_e64 v8, v8, v11
+; GFX9-G-O0-NEXT:    v_or_b32_e64 v10, v9, v10
+; GFX9-G-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v10
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, s5
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, s4
+; GFX9-G-O0-NEXT:    v_cmp_eq_u64_e64 s[6:7], v[8:9], v[10:11]
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v1
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, v4
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v0
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v3
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, v2
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v9
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v10
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v11
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, v13
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, v14
-; GFX9-G-O0-NEXT:    v_or_b32_e64 v9, v9, v12
-; GFX9-G-O0-NEXT:    v_or_b32_e64 v11, v10, v11
-; GFX9-G-O0-NEXT:    ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v11
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, s5
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, s4
-; GFX9-G-O0-NEXT:    v_cmp_eq_u64_e64 s[8:9], v[9:10], v[11:12]
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, v12
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v13
+; GFX9-G-O0-NEXT:    v_or_b32_e64 v8, v8, v11
+; GFX9-G-O0-NEXT:    v_or_b32_e64 v10, v9, v10
+; GFX9-G-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v10
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, s5
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, s4
+; GFX9-G-O0-NEXT:    v_cmp_eq_u64_e64 s[8:9], v[8:9], v[10:11]
 ; GFX9-G-O0-NEXT:    s_or_b64 s[6:7], s[6:7], s[8:9]
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, v6
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v5
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v8
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v4
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v7
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, v6
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, s4
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, s5
+; GFX9-G-O0-NEXT:    v_cmp_eq_u64_e64 s[8:9], v[7:8], v[4:5]
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, v9
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, v10
+; GFX9-G-O0-NEXT:    v_ffbh_u32_e64 v4, v4
+; GFX9-G-O0-NEXT:    v_ffbh_u32_e64 v5, v5
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, 32
+; GFX9-G-O0-NEXT:    v_add_u32_e64 v5, v5, v6
+; GFX9-G-O0-NEXT:    v_min_u32_e64 v4, v4, v5
+; GFX9-G-O0-NEXT:    s_mov_b32 s10, 64
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, s10
+; GFX9-G-O0-NEXT:    v_add_u32_e64 v5, v4, v5
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, v7
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, v8
+; GFX9-G-O0-NEXT:    v_ffbh_u32_e64 v4, v4
+; GFX9-G-O0-NEXT:    v_ffbh_u32_e64 v6, v6
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, 32
+; GFX9-G-O0-NEXT:    v_add_u32_e64 v6, v6, v7
+; GFX9-G-O0-NEXT:    v_min_u32_e64 v4, v4, v6
+; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v5, s[8:9]
+; GFX9-G-O0-NEXT:    s_mov_b32 s14, 0
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, v1
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v0
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v3
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v2
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, s5
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, s4
 ; GFX9-G-O0-NEXT:    v_cmp_eq_u64_e64 s[8:9], v[8:9], v[5:6]
@@ -3740,7 +3707,6 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, 32
 ; GFX9-G-O0-NEXT:    v_add_u32_e64 v6, v6, v7
 ; GFX9-G-O0-NEXT:    v_min_u32_e64 v5, v5, v6
-; GFX9-G-O0-NEXT:    s_mov_b32 s10, 64
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, s10
 ; GFX9-G-O0-NEXT:    v_add_u32_e64 v6, v5, v6
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, v8
@@ -3751,130 +3717,106 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-G-O0-NEXT:    v_add_u32_e64 v7, v7, v8
 ; GFX9-G-O0-NEXT:    v_min_u32_e64 v5, v5, v7
 ; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v5, v5, v6, s[8:9]
-; GFX9-G-O0-NEXT:    s_mov_b32 s14, 0
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, v2
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, v1
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v4
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v3
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, s5
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, s4
-; GFX9-G-O0-NEXT:    v_cmp_eq_u64_e64 s[8:9], v[9:10], v[6:7]
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, v11
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, v12
-; GFX9-G-O0-NEXT:    v_ffbh_u32_e64 v6, v6
-; GFX9-G-O0-NEXT:    v_ffbh_u32_e64 v7, v7
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, 32
-; GFX9-G-O0-NEXT:    v_add_u32_e64 v7, v7, v8
-; GFX9-G-O0-NEXT:    v_min_u32_e64 v6, v6, v7
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, s10
-; GFX9-G-O0-NEXT:    v_add_u32_e64 v7, v6, v7
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v9
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, v10
-; GFX9-G-O0-NEXT:    v_ffbh_u32_e64 v6, v6
-; GFX9-G-O0-NEXT:    v_ffbh_u32_e64 v8, v8
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, 32
-; GFX9-G-O0-NEXT:    v_add_u32_e64 v8, v8, v9
-; GFX9-G-O0-NEXT:    v_min_u32_e64 v6, v6, v8
-; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v6, v6, v7, s[8:9]
 ; GFX9-G-O0-NEXT:    s_mov_b32 s13, 0
 ; GFX9-G-O0-NEXT:    s_mov_b32 s11, 0
 ; GFX9-G-O0-NEXT:    s_mov_b32 s12, 0
 ; GFX9-G-O0-NEXT:    s_mov_b32 s10, 0
-; GFX9-G-O0-NEXT:    v_sub_co_u32_e64 v6, s[8:9], v5, v6
-; GFX9-G-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, s14
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, s14
-; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v7, s[8:9], v5, v7, s[8:9]
-; GFX9-G-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, s13
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, s12
-; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v9, s[8:9], v5, v8, s[8:9]
-; GFX9-G-O0-NEXT:    buffer_store_dword v9, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, s11
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, s10
-; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v8, s[8:9], v5, v8, s[8:9]
-; GFX9-G-O0-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    v_sub_co_u32_e64 v5, s[8:9], v4, v5
+; GFX9-G-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, s14
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, s14
+; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v6, s[8:9], v4, v6, s[8:9]
+; GFX9-G-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, s13
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, s12
+; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v8, s[8:9], v4, v7, s[8:9]
+; GFX9-G-O0-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, s11
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, s10
+; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v7, s[8:9], v4, v7, s[8:9]
+; GFX9-G-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    s_mov_b64 s[12:13], 0x7f
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v5
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v6
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, v7
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, v9
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v8
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v15, s5
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, s4
-; GFX9-G-O0-NEXT:    v_cmp_gt_u64_e64 s[10:11], v[12:13], v[14:15]
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v15, s5
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, s4
-; GFX9-G-O0-NEXT:    v_cmp_eq_u64_e64 s[8:9], v[12:13], v[14:15]
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, s12
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, s13
-; GFX9-G-O0-NEXT:    v_cmp_gt_u64_e64 s[12:13], v[10:11], v[12:13]
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, v8
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, v7
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, s5
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, s4
+; GFX9-G-O0-NEXT:    v_cmp_gt_u64_e64 s[10:11], v[11:12], v[13:14]
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, s5
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, s4
+; GFX9-G-O0-NEXT:    v_cmp_eq_u64_e64 s[8:9], v[11:12], v[13:14]
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, s12
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, s13
+; GFX9-G-O0-NEXT:    v_cmp_gt_u64_e64 s[12:13], v[9:10], v[11:12]
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, 1
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, 0
+; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v9, v4, v9, s[12:13]
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, 1
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, 0
-; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v10, v5, v10, s[12:13]
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, 1
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, 0
-; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v5, v5, v11, s[10:11]
-; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v10, v5, v10, s[8:9]
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, 1
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, 0
-; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v5, v5, v11, s[6:7]
-; GFX9-G-O0-NEXT:    v_or_b32_e64 v5, v5, v10
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, 0
+; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v10, s[10:11]
+; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v9, v4, v9, s[8:9]
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, 1
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, 0
+; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v10, s[6:7]
+; GFX9-G-O0-NEXT:    v_or_b32_e64 v4, v4, v9
 ; GFX9-G-O0-NEXT:    s_mov_b32 s7, 0x7f
 ; GFX9-G-O0-NEXT:    s_mov_b32 s6, 0
-; GFX9-G-O0-NEXT:    v_xor_b32_e64 v6, v6, s7
-; GFX9-G-O0-NEXT:    v_xor_b32_e64 v7, v7, s6
-; GFX9-G-O0-NEXT:    v_or_b32_e64 v6, v6, v9
-; GFX9-G-O0-NEXT:    v_or_b32_e64 v8, v7, v8
-; GFX9-G-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, v8
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, s5
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, s4
-; GFX9-G-O0-NEXT:    v_cmp_eq_u64_e64 s[4:5], v[6:7], v[8:9]
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, v2
+; GFX9-G-O0-NEXT:    v_xor_b32_e64 v5, v5, s7
+; GFX9-G-O0-NEXT:    v_xor_b32_e64 v6, v6, s6
+; GFX9-G-O0-NEXT:    v_or_b32_e64 v5, v5, v8
+; GFX9-G-O0-NEXT:    v_or_b32_e64 v7, v6, v7
+; GFX9-G-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, v7
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, s5
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, s4
+; GFX9-G-O0-NEXT:    v_cmp_eq_u64_e64 s[4:5], v[5:6], v[7:8]
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, v1
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v4
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, v0
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v3
-; GFX9-G-O0-NEXT:    v_and_b32_e32 v1, 1, v5
-; GFX9-G-O0-NEXT:    v_cmp_ne_u32_e64 s[6:7], 0, v1
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, 0
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, v2
+; GFX9-G-O0-NEXT:    v_and_b32_e32 v0, 1, v4
+; GFX9-G-O0-NEXT:    v_cmp_ne_u32_e64 s[6:7], 0, v0
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, 0
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, 0
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, v5
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, v6
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v7
-; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v1, v1, v4, s[6:7]
-; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v3, v2, v3, s[6:7]
-; GFX9-G-O0-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v3
-; GFX9-G-O0-NEXT:    v_and_b32_e32 v3, 1, v5
-; GFX9-G-O0-NEXT:    v_cmp_ne_u32_e64 s[6:7], 0, v3
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, 0
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, 0
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, v8
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, v9
-; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v6, v6, v7, s[6:7]
-; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v3, v3, v4, s[6:7]
-; GFX9-G-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, v3
-; GFX9-G-O0-NEXT:    ; kill: def $vgpr1_vgpr2 killed $vgpr1_vgpr2 def $vgpr1_vgpr2_vgpr3_vgpr4 killed $exec
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, v6
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, v7
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, 1
+; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v0, v0, v3, s[6:7]
+; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v2, v1, v2, s[6:7]
+; GFX9-G-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, v2
+; GFX9-G-O0-NEXT:    v_and_b32_e32 v2, 1, v4
+; GFX9-G-O0-NEXT:    v_cmp_ne_u32_e64 s[6:7], 0, v2
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, 0
-; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v6, v6, v7, s[4:5]
-; GFX9-G-O0-NEXT:    v_or_b32_e64 v5, v5, v6
-; GFX9-G-O0-NEXT:    v_and_b32_e32 v5, 1, v5
-; GFX9-G-O0-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v5
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, 0
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, v7
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v8
+; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v5, v5, v6, s[6:7]
+; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v2, v2, v3, s[6:7]
+; GFX9-G-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, v2
+; GFX9-G-O0-NEXT:    ; kill: def $vgpr0_vgpr1 killed $vgpr0_vgpr1 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v5
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, v6
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, 1
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, 0
+; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v5, v5, v6, s[4:5]
+; GFX9-G-O0-NEXT:    v_or_b32_e64 v4, v4, v5
+; GFX9-G-O0-NEXT:    v_and_b32_e32 v4, 1, v4
+; GFX9-G-O0-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v4
 ; GFX9-G-O0-NEXT:    s_mov_b64 s[6:7], -1
 ; GFX9-G-O0-NEXT:    s_xor_b64 s[6:7], s[4:5], s[6:7]
-; GFX9-G-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-G-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    s_mov_b64 s[4:5], exec
-; GFX9-G-O0-NEXT:    v_writelane_b32 v0, s4, 0
-; GFX9-G-O0-NEXT:    v_writelane_b32 v0, s5, 1
+; GFX9-G-O0-NEXT:    ; implicit-def: $vgpr34 : SGPR spill to VGPR lane
+; GFX9-G-O0-NEXT:    v_writelane_b32 v34, s4, 0
+; GFX9-G-O0-NEXT:    v_writelane_b32 v34, s5, 1
 ; GFX9-G-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GFX9-G-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v34, off, s[0:3], s32 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    s_mov_b64 exec, s[18:19]
 ; GFX9-G-O0-NEXT:    s_and_b64 s[4:5], s[4:5], s[6:7]
 ; GFX9-G-O0-NEXT:    s_mov_b64 exec, s[4:5]
@@ -3882,61 +3824,61 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-G-O0-NEXT:    s_branch .LBB1_8
 ; GFX9-G-O0-NEXT:  .LBB1_1: ; %Flow
 ; GFX9-G-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GFX9-G-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload
 ; GFX9-G-O0-NEXT:    s_mov_b64 exec, s[18:19]
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-G-O0-NEXT:    v_readlane_b32 s4, v0, 2
-; GFX9-G-O0-NEXT:    v_readlane_b32 s5, v0, 3
+; GFX9-G-O0-NEXT:    v_readlane_b32 s4, v34, 2
+; GFX9-G-O0-NEXT:    v_readlane_b32 s5, v34, 3
 ; GFX9-G-O0-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX9-G-O0-NEXT:  ; %bb.2: ; %Flow
-; GFX9-G-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:112 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:120 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:124 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:128 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:112 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:120 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:124 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:128 ; 4-byte Folded Reload
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(4)
-; GFX9-G-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-G-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-G-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    s_branch .LBB1_5
 ; GFX9-G-O0-NEXT:  .LBB1_3: ; %Flow2
 ; GFX9-G-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GFX9-G-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload
 ; GFX9-G-O0-NEXT:    s_mov_b64 exec, s[18:19]
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-G-O0-NEXT:    v_readlane_b32 s4, v4, 0
-; GFX9-G-O0-NEXT:    v_readlane_b32 s5, v4, 1
+; GFX9-G-O0-NEXT:    v_readlane_b32 s4, v34, 0
+; GFX9-G-O0-NEXT:    v_readlane_b32 s5, v34, 1
 ; GFX9-G-O0-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX9-G-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
 ; GFX9-G-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
 ; GFX9-G-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
 ; GFX9-G-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-G-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-G-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    s_branch .LBB1_9
 ; GFX9-G-O0-NEXT:  .LBB1_4: ; %udiv-loop-exit
-; GFX9-G-O0-NEXT:    buffer_load_dword v14, off, s[0:3], s32 offset:152 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v15, off, s[0:3], s32 offset:156 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v16, off, s[0:3], s32 offset:160 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v17, off, s[0:3], s32 offset:164 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:168 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:172 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:176 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:180 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v14, off, s[0:3], s32 offset:148 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v15, off, s[0:3], s32 offset:152 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v16, off, s[0:3], s32 offset:156 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v17, off, s[0:3], s32 offset:160 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:164 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:168 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:172 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:176 ; 4-byte Folded Reload
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v4
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, v5
@@ -3985,64 +3927,64 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-G-O0-NEXT:    s_branch .LBB1_3
 ; GFX9-G-O0-NEXT:  .LBB1_5: ; %Flow1
 ; GFX9-G-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GFX9-G-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload
 ; GFX9-G-O0-NEXT:    s_mov_b64 exec, s[18:19]
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-G-O0-NEXT:    v_readlane_b32 s4, v8, 4
-; GFX9-G-O0-NEXT:    v_readlane_b32 s5, v8, 5
+; GFX9-G-O0-NEXT:    v_readlane_b32 s4, v34, 4
+; GFX9-G-O0-NEXT:    v_readlane_b32 s5, v34, 5
 ; GFX9-G-O0-NEXT:    s_or_b64 exec, exec, s[4:5]
-; GFX9-G-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-G-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-G-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-G-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    s_branch .LBB1_4
 ; GFX9-G-O0-NEXT:  .LBB1_6: ; %udiv-do-while
 ; GFX9-G-O0-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX9-G-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GFX9-G-O0-NEXT:    buffer_load_dword v16, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload
 ; GFX9-G-O0-NEXT:    s_mov_b64 exec, s[18:19]
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-G-O0-NEXT:    v_readlane_b32 s6, v16, 6
-; GFX9-G-O0-NEXT:    v_readlane_b32 s7, v16, 7
-; GFX9-G-O0-NEXT:    buffer_load_dword v12, off, s[0:3], s32 offset:184 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v13, off, s[0:3], s32 offset:188 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v14, off, s[0:3], s32 offset:192 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v15, off, s[0:3], s32 offset:196 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:200 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:204 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:208 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:212 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    v_readlane_b32 s6, v34, 6
+; GFX9-G-O0-NEXT:    v_readlane_b32 s7, v34, 7
+; GFX9-G-O0-NEXT:    buffer_load_dword v12, off, s[0:3], s32 offset:180 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v13, off, s[0:3], s32 offset:184 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v14, off, s[0:3], s32 offset:188 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v15, off, s[0:3], s32 offset:192 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:196 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:200 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:204 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:208 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v16, off, s[0:3], s32 offset:212 ; 4-byte Folded Reload
 ; GFX9-G-O0-NEXT:    buffer_load_dword v17, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload
 ; GFX9-G-O0-NEXT:    buffer_load_dword v18, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload
 ; GFX9-G-O0-NEXT:    buffer_load_dword v19, off, s[0:3], s32 offset:224 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v20, off, s[0:3], s32 offset:228 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v30, off, s[0:3], s32 offset:228 ; 4-byte Folded Reload
 ; GFX9-G-O0-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload
 ; GFX9-G-O0-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload
 ; GFX9-G-O0-NEXT:    buffer_load_dword v33, off, s[0:3], s32 offset:240 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v34, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v25, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v26, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v27, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v28, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v10, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v24, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v25, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v26, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v27, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v10, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload
 ; GFX9-G-O0-NEXT:    s_mov_b64 s[4:5], 0
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(16)
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, v2
@@ -4051,7 +3993,7 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, v5
 ; GFX9-G-O0-NEXT:    s_mov_b32 s8, 1
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, s8
-; GFX9-G-O0-NEXT:    v_lshlrev_b64 v[21:22], v2, v[0:1]
+; GFX9-G-O0-NEXT:    v_lshlrev_b64 v[20:21], v2, v[0:1]
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, s8
 ; GFX9-G-O0-NEXT:    v_lshlrev_b64 v[4:5], v2, v[3:4]
 ; GFX9-G-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr0 killed $exec
@@ -4075,8 +4017,8 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-G-O0-NEXT:    v_lshrrev_b32_e64 v3, v0, v1
 ; GFX9-G-O0-NEXT:    s_mov_b32 s9, 0
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, s9
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v21
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, v22
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v20
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, v21
 ; GFX9-G-O0-NEXT:    v_or_b32_e64 v4, v2, v3
 ; GFX9-G-O0-NEXT:    v_or_b32_e64 v9, v0, v1
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v12
@@ -4084,7 +4026,7 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, v14
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v15
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, s8
-; GFX9-G-O0-NEXT:    v_lshlrev_b64 v[23:24], v0, v[2:3]
+; GFX9-G-O0-NEXT:    v_lshlrev_b64 v[22:23], v0, v[2:3]
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, s8
 ; GFX9-G-O0-NEXT:    v_lshlrev_b64 v[0:1], v0, v[12:13]
 ; GFX9-G-O0-NEXT:    ; kill: def $vgpr12 killed $vgpr2 killed $exec
@@ -4097,20 +4039,20 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, v0
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v1
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(8)
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v28, v30
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v29, v31
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v30, v32
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v20, v32
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v21, v33
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v22, v34
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, v29
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, v30
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v15, v23
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v24
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, v28
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, v29
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v15, v22
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v23
 ; GFX9-G-O0-NEXT:    v_or_b32_e64 v0, v0, v15
 ; GFX9-G-O0-NEXT:    v_or_b32_e64 v13, v1, v13
 ; GFX9-G-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, v13
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v15, v21
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v22
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v15, v20
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v21
 ; GFX9-G-O0-NEXT:    v_or3_b32 v12, v12, v14, v15
 ; GFX9-G-O0-NEXT:    v_or3_b32 v2, v2, v3, v13
 ; GFX9-G-O0-NEXT:    ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
@@ -4140,18 +4082,18 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-G-O0-NEXT:    ; kill: def $vgpr12_vgpr13 killed $vgpr12_vgpr13 def $vgpr12_vgpr13_vgpr14_vgpr15 killed $exec
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v15, v11
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, v10
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v22, v24
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v23, v25
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v24, v26
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v20, v26
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v21, v27
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v22, v28
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, v23
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v24
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, v22
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v23
 ; GFX9-G-O0-NEXT:    v_and_b32_e64 v11, v8, v11
 ; GFX9-G-O0-NEXT:    v_and_b32_e64 v10, v8, v10
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v21
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v21, v22
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v20
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v20, v21
 ; GFX9-G-O0-NEXT:    v_and_b32_e64 v8, v6, v8
-; GFX9-G-O0-NEXT:    v_and_b32_e64 v6, v6, v21
+; GFX9-G-O0-NEXT:    v_and_b32_e64 v6, v6, v20
 ; GFX9-G-O0-NEXT:    v_sub_co_u32_e64 v4, s[8:9], v4, v11
 ; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v10, s[8:9], v9, v10, s[8:9]
 ; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v9, s[8:9], v7, v8, s[8:9]
@@ -4160,347 +4102,338 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, v10
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, v9
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, v8
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, v17
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v18
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v19
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v20
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, v16
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v17
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v18
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v19
 ; GFX9-G-O0-NEXT:    s_mov_b32 s8, -1
 ; GFX9-G-O0-NEXT:    s_mov_b32 s12, -1
 ; GFX9-G-O0-NEXT:    s_mov_b32 s11, -1
 ; GFX9-G-O0-NEXT:    s_mov_b32 s10, -1
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v17, s8
-; GFX9-G-O0-NEXT:    v_add_co_u32_e64 v17, s[8:9], v11, v17
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v16, s8
+; GFX9-G-O0-NEXT:    v_add_co_u32_e64 v16, s[8:9], v11, v16
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, s12
-; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v18, s[8:9], v10, v11, s[8:9]
+; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v17, s[8:9], v10, v11, s[8:9]
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, s11
-; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v20, s[8:9], v9, v10, s[8:9]
+; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v19, s[8:9], v9, v10, s[8:9]
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, s10
-; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v19, s[8:9], v8, v9, s[8:9]
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v17
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v18
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v20
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, v19
-; GFX9-G-O0-NEXT:    v_or_b32_e64 v17, v17, v20
-; GFX9-G-O0-NEXT:    v_or_b32_e64 v19, v18, v19
-; GFX9-G-O0-NEXT:    ; kill: def $vgpr17 killed $vgpr17 def $vgpr17_vgpr18 killed $exec
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v18, v19
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v20, s5
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v19, s4
-; GFX9-G-O0-NEXT:    v_cmp_eq_u64_e64 s[4:5], v[17:18], v[19:20]
+; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v18, s[8:9], v8, v9, s[8:9]
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v16
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v17
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v19
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, v18
+; GFX9-G-O0-NEXT:    v_or_b32_e64 v16, v16, v19
+; GFX9-G-O0-NEXT:    v_or_b32_e64 v18, v17, v18
+; GFX9-G-O0-NEXT:    ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v17, v18
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v19, s5
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v18, s4
+; GFX9-G-O0-NEXT:    v_cmp_eq_u64_e64 s[4:5], v[16:17], v[18:19]
 ; GFX9-G-O0-NEXT:    s_or_b64 s[4:5], s[4:5], s[6:7]
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v20, v3
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v19, v2
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v18, v1
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v17, v0
-; GFX9-G-O0-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v19, v3
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v18, v2
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v17, v1
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v16, v0
+; GFX9-G-O0-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-G-O0-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    buffer_store_dword v18, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    buffer_store_dword v19, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v20, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v20, v15
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v19, v14
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v18, v13
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v17, v12
-; GFX9-G-O0-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v19, v15
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v18, v14
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v17, v13
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v16, v12
+; GFX9-G-O0-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-G-O0-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    buffer_store_dword v18, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    buffer_store_dword v19, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v20, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    s_mov_b64 s[6:7], s[4:5]
-; GFX9-G-O0-NEXT:    v_writelane_b32 v16, s6, 2
-; GFX9-G-O0-NEXT:    v_writelane_b32 v16, s7, 3
+; GFX9-G-O0-NEXT:    v_writelane_b32 v34, s6, 2
+; GFX9-G-O0-NEXT:    v_writelane_b32 v34, s7, 3
 ; GFX9-G-O0-NEXT:    s_mov_b64 s[6:7], s[4:5]
-; GFX9-G-O0-NEXT:    v_writelane_b32 v16, s6, 6
-; GFX9-G-O0-NEXT:    v_writelane_b32 v16, s7, 7
+; GFX9-G-O0-NEXT:    v_writelane_b32 v34, s6, 6
+; GFX9-G-O0-NEXT:    v_writelane_b32 v34, s7, 7
 ; GFX9-G-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GFX9-G-O0-NEXT:    buffer_store_dword v16, off, s[0:3], s32 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v34, off, s[0:3], s32 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    s_mov_b64 exec, s[18:19]
-; GFX9-G-O0-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-G-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v14, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v15, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v14, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v15, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-G-O0-NEXT:    buffer_store_dword v9, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v10, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v11, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v9, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v10, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v11, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-G-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-G-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    s_andn2_b64 exec, exec, s[4:5]
 ; GFX9-G-O0-NEXT:    s_cbranch_execnz .LBB1_6
 ; GFX9-G-O0-NEXT:    s_branch .LBB1_1
 ; GFX9-G-O0-NEXT:  .LBB1_7: ; %udiv-preheader
-; GFX9-G-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v9, off, s[0:3], s32 offset:284 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v10, off, s[0:3], s32 offset:288 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:292 ; 4-byte Folded Reload
 ; GFX9-G-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GFX9-G-O0-NEXT:    buffer_load_dword v12, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload
 ; GFX9-G-O0-NEXT:    s_mov_b64 exec, s[18:19]
-; GFX9-G-O0-NEXT:    buffer_load_dword v17, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v18, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v19, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v20, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v13, off, s[0:3], s32 offset:296 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v9, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v10, off, s[0:3], s32 offset:284 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:288 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v16, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v17, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v18, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v19, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v12, off, s[0:3], s32 offset:292 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
 ; GFX9-G-O0-NEXT:    s_mov_b32 s4, 64
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v16, v5
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v15, v4
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v22, v7
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v21, v6
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v15, v5
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, v4
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v21, v7
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v20, v6
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, s4
-; GFX9-G-O0-NEXT:    v_sub_u32_e64 v4, v13, v4
+; GFX9-G-O0-NEXT:    v_sub_u32_e64 v4, v12, v4
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, s4
-; GFX9-G-O0-NEXT:    v_sub_u32_e64 v5, v5, v13
+; GFX9-G-O0-NEXT:    v_sub_u32_e64 v5, v5, v12
 ; GFX9-G-O0-NEXT:    s_mov_b32 s6, 0
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, s4
-; GFX9-G-O0-NEXT:    v_cmp_lt_u32_e64 s[4:5], v13, v6
+; GFX9-G-O0-NEXT:    v_cmp_lt_u32_e64 s[4:5], v12, v6
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, s6
-; GFX9-G-O0-NEXT:    v_cmp_eq_u32_e64 s[6:7], v13, v6
-; GFX9-G-O0-NEXT:    v_lshrrev_b64 v[6:7], v13, v[21:22]
-; GFX9-G-O0-NEXT:    v_lshrrev_b64 v[26:27], v13, v[15:16]
-; GFX9-G-O0-NEXT:    v_lshlrev_b64 v[24:25], v5, v[21:22]
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, v26
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, v27
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v23, v24
+; GFX9-G-O0-NEXT:    v_cmp_eq_u32_e64 s[6:7], v12, v6
+; GFX9-G-O0-NEXT:    v_lshrrev_b64 v[6:7], v12, v[20:21]
+; GFX9-G-O0-NEXT:    v_lshrrev_b64 v[25:26], v12, v[14:15]
+; GFX9-G-O0-NEXT:    v_lshlrev_b64 v[23:24], v5, v[20:21]
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v25
-; GFX9-G-O0-NEXT:    v_or_b32_e64 v14, v14, v23
-; GFX9-G-O0-NEXT:    v_or_b32_e64 v13, v5, v13
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, v26
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v22, v23
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, v24
+; GFX9-G-O0-NEXT:    v_or_b32_e64 v13, v13, v22
+; GFX9-G-O0-NEXT:    v_or_b32_e64 v12, v5, v12
 ; GFX9-G-O0-NEXT:    s_mov_b64 s[8:9], 0
-; GFX9-G-O0-NEXT:    v_lshrrev_b64 v[21:22], v4, v[21:22]
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, v21
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, v22
-; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v14, s[4:5]
-; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v5, v5, v13, s[4:5]
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, v15
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v16
-; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v14, s[6:7]
-; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v13, v5, v13, s[6:7]
+; GFX9-G-O0-NEXT:    v_lshrrev_b64 v[20:21], v4, v[20:21]
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, v20
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, v21
+; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v13, s[4:5]
+; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v5, v5, v12, s[4:5]
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v14
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, v15
+; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v13, s[6:7]
+; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v12, v5, v12, s[6:7]
 ; GFX9-G-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, v13
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, v6
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, v12
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v6
 ; GFX9-G-O0-NEXT:    ; kill: def $vgpr7 killed $vgpr7 killed $vgpr6_vgpr7 killed $exec
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, 0
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, 0
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, 0
-; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v13, v13, v14, s[4:5]
+; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v12, v12, v13, s[4:5]
 ; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v6, v6, v7, s[4:5]
-; GFX9-G-O0-NEXT:    ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14 killed $exec
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, v6
+; GFX9-G-O0-NEXT:    ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v6
 ; GFX9-G-O0-NEXT:    ; kill: def $vgpr4_vgpr5 killed $vgpr4_vgpr5 def $vgpr4_vgpr5_vgpr6_vgpr7 killed $exec
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, v13
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, v14
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v16, v17
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v15, v18
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, v19
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v20
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, v12
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, v13
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v15, v16
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, v17
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v18
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, v19
 ; GFX9-G-O0-NEXT:    s_mov_b32 s4, -1
 ; GFX9-G-O0-NEXT:    s_mov_b32 s10, -1
 ; GFX9-G-O0-NEXT:    s_mov_b32 s7, -1
 ; GFX9-G-O0-NEXT:    s_mov_b32 s6, -1
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v17, s4
-; GFX9-G-O0-NEXT:    v_add_co_u32_e64 v16, s[4:5], v16, v17
-; GFX9-G-O0-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v16, s10
-; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v15, s[4:5], v15, v16, s[4:5]
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v16, s4
+; GFX9-G-O0-NEXT:    v_add_co_u32_e64 v15, s[4:5], v15, v16
 ; GFX9-G-O0-NEXT:    buffer_store_dword v15, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v15, s7
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v15, s10
 ; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v14, s[4:5], v14, v15, s[4:5]
 ; GFX9-G-O0-NEXT:    buffer_store_dword v14, off, s[0:3], s32 offset:252 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, s6
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, s7
 ; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v13, s[4:5], v13, v14, s[4:5]
 ; GFX9-G-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:248 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, s6
+; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v12, s[4:5], v12, v13, s[4:5]
+; GFX9-G-O0-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    s_mov_b64 s[4:5], s[8:9]
 ; GFX9-G-O0-NEXT:    s_mov_b64 s[6:7], s[8:9]
-; GFX9-G-O0-NEXT:    v_writelane_b32 v12, s8, 6
-; GFX9-G-O0-NEXT:    v_writelane_b32 v12, s9, 7
+; GFX9-G-O0-NEXT:    v_writelane_b32 v34, s8, 6
+; GFX9-G-O0-NEXT:    v_writelane_b32 v34, s9, 7
 ; GFX9-G-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GFX9-G-O0-NEXT:    buffer_store_dword v12, off, s[0:3], s32 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v34, off, s[0:3], s32 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    s_mov_b64 exec, s[18:19]
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v15, s7
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, s6
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, s5
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, s4
-; GFX9-G-O0-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-G-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v14, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v15, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v14, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v15, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-G-O0-NEXT:    buffer_store_dword v9, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v10, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v11, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v9, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v10, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v11, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-G-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-G-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    s_branch .LBB1_6
 ; GFX9-G-O0-NEXT:  .LBB1_8: ; %udiv-bb1
 ; GFX9-G-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GFX9-G-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload
 ; GFX9-G-O0-NEXT:    s_mov_b64 exec, s[18:19]
+; GFX9-G-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
 ; GFX9-G-O0-NEXT:    buffer_load_dword v9, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
 ; GFX9-G-O0-NEXT:    buffer_load_dword v10, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
 ; GFX9-G-O0-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v12, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
 ; GFX9-G-O0-NEXT:    s_mov_b64 s[4:5], 0
 ; GFX9-G-O0-NEXT:    s_mov_b32 s6, 1
 ; GFX9-G-O0-NEXT:    s_mov_b32 s10, 0
 ; GFX9-G-O0-NEXT:    s_mov_b32 s9, 0
 ; GFX9-G-O0-NEXT:    s_mov_b32 s8, 0
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, s6
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, s6
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(3)
-; GFX9-G-O0-NEXT:    v_add_co_u32_e64 v5, s[6:7], v2, v5
-; GFX9-G-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, s10
+; GFX9-G-O0-NEXT:    v_add_co_u32_e64 v4, s[6:7], v1, v4
+; GFX9-G-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, s10
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(1)
-; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v6, s[6:7], v4, v6, s[6:7]
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, s9
-; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v8, s[6:7], v3, v4, s[6:7]
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, s8
-; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v7, s[6:7], v1, v3, s[6:7]
+; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v5, s[6:7], v3, v5, s[6:7]
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, s9
+; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v7, s[6:7], v2, v3, s[6:7]
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, s8
+; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v6, s[6:7], v0, v2, s[6:7]
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, v4
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v5
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, v6
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v15, v8
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v16, v7
-; GFX9-G-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, v7
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v15, v6
+; GFX9-G-O0-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-G-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    buffer_store_dword v14, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    buffer_store_dword v15, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    s_mov_b32 s6, 0x7f
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, s6
-; GFX9-G-O0-NEXT:    v_sub_co_u32_e64 v4, s[6:7], v1, v2
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, s6
+; GFX9-G-O0-NEXT:    v_sub_co_u32_e64 v3, s[6:7], v0, v1
 ; GFX9-G-O0-NEXT:    s_mov_b32 s7, 64
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, v10
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v9
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, s7
-; GFX9-G-O0-NEXT:    v_sub_u32_e64 v3, v4, v1
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, s7
-; GFX9-G-O0-NEXT:    v_sub_u32_e64 v9, v1, v4
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, v8
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, s7
+; GFX9-G-O0-NEXT:    v_sub_u32_e64 v2, v3, v0
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, s7
+; GFX9-G-O0-NEXT:    v_sub_u32_e64 v8, v0, v3
 ; GFX9-G-O0-NEXT:    s_mov_b32 s6, 0
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, s7
-; GFX9-G-O0-NEXT:    v_cmp_lt_u32_e64 s[8:9], v4, v1
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, s6
-; GFX9-G-O0-NEXT:    v_cmp_eq_u32_e64 s[6:7], v4, v1
-; GFX9-G-O0-NEXT:    v_lshlrev_b64 v[1:2], v4, v[13:14]
-; GFX9-G-O0-NEXT:    v_lshrrev_b64 v[18:19], v9, v[13:14]
-; GFX9-G-O0-NEXT:    v_lshlrev_b64 v[16:17], v4, v[11:12]
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v18
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, v19
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v15, v16
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, s7
+; GFX9-G-O0-NEXT:    v_cmp_lt_u32_e64 s[8:9], v3, v0
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, s6
+; GFX9-G-O0-NEXT:    v_cmp_eq_u32_e64 s[6:7], v3, v0
+; GFX9-G-O0-NEXT:    v_lshlrev_b64 v[0:1], v3, v[12:13]
+; GFX9-G-O0-NEXT:    v_lshrrev_b64 v[17:18], v8, v[12:13]
+; GFX9-G-O0-NEXT:    v_lshlrev_b64 v[15:16], v3, v[10:11]
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v17
-; GFX9-G-O0-NEXT:    v_or_b32_e64 v10, v10, v15
-; GFX9-G-O0-NEXT:    v_or_b32_e64 v4, v4, v9
-; GFX9-G-O0-NEXT:    v_lshlrev_b64 v[13:14], v3, v[13:14]
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v1
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, v2
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, v18
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, v15
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v16
+; GFX9-G-O0-NEXT:    v_or_b32_e64 v9, v9, v14
+; GFX9-G-O0-NEXT:    v_or_b32_e64 v3, v3, v8
+; GFX9-G-O0-NEXT:    v_lshlrev_b64 v[12:13], v2, v[12:13]
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v0
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v1
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, 0
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, 0
-; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v1, v1, v9, s[8:9]
-; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v3, v2, v3, s[8:9]
-; GFX9-G-O0-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v3
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v13
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, v14
-; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v9, v9, v10, s[8:9]
-; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v3, v3, v4, s[8:9]
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v11
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, v12
-; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v9, v9, v10, s[6:7]
-; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v3, v3, v4, s[6:7]
-; GFX9-G-O0-NEXT:    ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v3
-; GFX9-G-O0-NEXT:    ; kill: def $vgpr1_vgpr2 killed $vgpr1_vgpr2 def $vgpr1_vgpr2_vgpr3_vgpr4 killed $exec
+; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v0, v0, v8, s[8:9]
+; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v2, v1, v2, s[8:9]
+; GFX9-G-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, v2
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v12
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v13
+; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v8, v8, v9, s[8:9]
+; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v2, v2, v3, s[8:9]
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v10
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, v11
+; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v8, v8, v9, s[6:7]
+; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v2, v2, v3, s[6:7]
+; GFX9-G-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v2
+; GFX9-G-O0-NEXT:    ; kill: def $vgpr0_vgpr1 killed $vgpr0_vgpr1 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v8
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, v9
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, v10
-; GFX9-G-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-G-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:268 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    s_mov_b64 s[8:9], s[4:5]
 ; GFX9-G-O0-NEXT:    s_mov_b64 s[10:11], s[4:5]
-; GFX9-G-O0-NEXT:    v_or_b32_e64 v5, v5, v8
-; GFX9-G-O0-NEXT:    v_or_b32_e64 v7, v6, v7
-; GFX9-G-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, v7
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, s5
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, s4
-; GFX9-G-O0-NEXT:    v_cmp_ne_u64_e64 s[4:5], v[5:6], v[7:8]
-; GFX9-G-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    v_or_b32_e64 v4, v4, v7
+; GFX9-G-O0-NEXT:    v_or_b32_e64 v6, v5, v6
+; GFX9-G-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, v6
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, s5
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, s4
+; GFX9-G-O0-NEXT:    v_cmp_ne_u64_e64 s[4:5], v[4:5], v[6:7]
+; GFX9-G-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-G-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, s8
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, s9
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, s10
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, s11
-; GFX9-G-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, s8
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, s9
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, s10
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, s11
+; GFX9-G-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-G-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    s_mov_b64 s[6:7], exec
 ; GFX9-G-O0-NEXT:    s_and_b64 s[4:5], s[6:7], s[4:5]
 ; GFX9-G-O0-NEXT:    s_xor_b64 s[6:7], s[4:5], s[6:7]
-; GFX9-G-O0-NEXT:    v_writelane_b32 v0, s6, 4
-; GFX9-G-O0-NEXT:    v_writelane_b32 v0, s7, 5
+; GFX9-G-O0-NEXT:    v_writelane_b32 v34, s6, 4
+; GFX9-G-O0-NEXT:    v_writelane_b32 v34, s7, 5
 ; GFX9-G-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GFX9-G-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v34, off, s[0:3], s32 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    s_mov_b64 exec, s[18:19]
 ; GFX9-G-O0-NEXT:    s_mov_b64 exec, s[4:5]
 ; GFX9-G-O0-NEXT:    s_cbranch_execz .LBB1_5
 ; GFX9-G-O0-NEXT:    s_branch .LBB1_7
 ; GFX9-G-O0-NEXT:  .LBB1_9: ; %udiv-end
-; GFX9-G-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GFX9-G-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    s_mov_b64 exec, s[18:19]
-; GFX9-G-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:136 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:140 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:144 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:148 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:136 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:140 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:144 ; 4-byte Folded Reload
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, v5
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, v6
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v7
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, v8
-; GFX9-G-O0-NEXT:    ; kill: killed $vgpr4
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, v3
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, v4
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v5
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, v6
 ; GFX9-G-O0-NEXT:    s_xor_saveexec_b64 s[4:5], -1
-; GFX9-G-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:300 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    s_nop 0
-; GFX9-G-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:304 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:308 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v16, off, s[0:3], s32 offset:312 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v12, off, s[0:3], s32 offset:316 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v34, off, s[0:3], s32 offset:296 ; 4-byte Folded Reload
 ; GFX9-G-O0-NEXT:    s_mov_b64 exec, s[4:5]
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-G-O0-NEXT:    s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/extend-wwm-virt-reg-liveness.mir b/llvm/test/CodeGen/AMDGPU/extend-wwm-virt-reg-liveness.mir
index 3bf7e7b8c5696..6eb229f0d0e1c 100644
--- a/llvm/test/CodeGen/AMDGPU/extend-wwm-virt-reg-liveness.mir
+++ b/llvm/test/CodeGen/AMDGPU/extend-wwm-virt-reg-liveness.mir
@@ -25,13 +25,12 @@ body:             |
     ; GCN-LABEL: name: test_single_block
     ; GCN: liveins: $sgpr4, $vgpr2_vgpr3
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: renamable $vgpr0 = IMPLICIT_DEF
-    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, killed $vgpr0
+    ; GCN-NEXT: renamable $vgpr63 = IMPLICIT_DEF
+    ; GCN-NEXT: renamable $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, killed $vgpr63
     ; GCN-NEXT: S_NOP 0
-    ; GCN-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0
-    ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 20, implicit $exec
-    ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr2_vgpr3, killed renamable $vgpr1, 0, 0, implicit $exec
-    ; GCN-NEXT: KILL killed renamable $vgpr0
+    ; GCN-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR killed $vgpr63, 0
+    ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 20, implicit $exec
+    ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr2_vgpr3, [[V_MOV_B32_e32_]], 0, 0, implicit $exec
     ; GCN-NEXT: SI_RETURN
     SI_SPILL_S32_SAVE killed $sgpr4, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
     S_NOP 0
@@ -63,32 +62,31 @@ body:             |
   ; GCN-NEXT:   successors: %bb.1(0x80000000)
   ; GCN-NEXT:   liveins: $sgpr6, $sgpr10_sgpr11
   ; GCN-NEXT: {{  $}}
-  ; GCN-NEXT:   renamable $vgpr0 = IMPLICIT_DEF
   ; GCN-NEXT:   S_BRANCH %bb.1
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT: bb.1:
   ; GCN-NEXT:   successors: %bb.3(0x40000000), %bb.2(0x40000000)
-  ; GCN-NEXT:   liveins: $sgpr6, $vgpr0, $sgpr10_sgpr11
+  ; GCN-NEXT:   liveins: $sgpr6, $sgpr10_sgpr11
   ; GCN-NEXT: {{  $}}
-  ; GCN-NEXT:   renamable $vgpr1 = V_MOV_B32_e32 10, implicit $exec
+  ; GCN-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 10, implicit $exec
   ; GCN-NEXT:   S_CBRANCH_EXECZ %bb.3, implicit $exec
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT: bb.2:
   ; GCN-NEXT:   successors: %bb.3(0x80000000)
-  ; GCN-NEXT:   liveins: $sgpr6, $vgpr0, $sgpr10_sgpr11
+  ; GCN-NEXT:   liveins: $sgpr6, $sgpr10_sgpr11
   ; GCN-NEXT: {{  $}}
-  ; GCN-NEXT:   renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr6, 0, killed $vgpr0
+  ; GCN-NEXT:   renamable $vgpr63 = IMPLICIT_DEF
+  ; GCN-NEXT:   renamable $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr6, 0, killed $vgpr63
   ; GCN-NEXT:   S_NOP 0
-  ; GCN-NEXT:   $sgpr6 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0
-  ; GCN-NEXT:   renamable $vgpr1 = V_MOV_B32_e32 20, implicit $exec
+  ; GCN-NEXT:   $sgpr6 = SI_RESTORE_S32_FROM_VGPR killed $vgpr63, 0
+  ; GCN-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 20, implicit $exec
   ; GCN-NEXT:   S_BRANCH %bb.3
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT: bb.3:
-  ; GCN-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr10_sgpr11
+  ; GCN-NEXT:   liveins: $sgpr10_sgpr11
   ; GCN-NEXT: {{  $}}
-  ; GCN-NEXT:   $sgpr5 = V_READFIRSTLANE_B32 killed $vgpr1, implicit $exec
+  ; GCN-NEXT:   $sgpr5 = V_READFIRSTLANE_B32 [[V_MOV_B32_e32_]], implicit $exec
   ; GCN-NEXT:   S_STORE_DWORD_IMM $sgpr5, $sgpr10_sgpr11, 0, 0
-  ; GCN-NEXT:   KILL killed renamable $vgpr0
   ; GCN-NEXT:   SI_RETURN
   bb.0:
     liveins: $sgpr6, $sgpr10_sgpr11
@@ -135,52 +133,50 @@ body:             |
   ; GCN-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
   ; GCN-NEXT:   liveins: $sgpr4, $sgpr10_sgpr11
   ; GCN-NEXT: {{  $}}
-  ; GCN-NEXT:   renamable $vgpr0 = IMPLICIT_DEF
-  ; GCN-NEXT:   renamable $vgpr1 = V_MOV_B32_e32 10, implicit $exec
+  ; GCN-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 10, implicit $exec
   ; GCN-NEXT:   S_CBRANCH_EXECZ %bb.2, implicit $exec
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT: bb.1:
   ; GCN-NEXT:   successors: %bb.2(0x80000000)
-  ; GCN-NEXT:   liveins: $sgpr4, $vgpr0, $sgpr10_sgpr11
+  ; GCN-NEXT:   liveins: $sgpr4, $sgpr10_sgpr11
   ; GCN-NEXT: {{  $}}
-  ; GCN-NEXT:   renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, killed $vgpr0
+  ; GCN-NEXT:   renamable $vgpr63 = IMPLICIT_DEF
+  ; GCN-NEXT:   renamable $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, killed $vgpr63
   ; GCN-NEXT:   S_NOP 0
-  ; GCN-NEXT:   $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0
-  ; GCN-NEXT:   renamable $vgpr1 = V_MOV_B32_e32 20, implicit $exec
+  ; GCN-NEXT:   $sgpr4 = SI_RESTORE_S32_FROM_VGPR killed $vgpr63, 0
+  ; GCN-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 20, implicit $exec
   ; GCN-NEXT:   S_BRANCH %bb.2
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT: bb.2:
   ; GCN-NEXT:   successors: %bb.3(0x80000000)
-  ; GCN-NEXT:   liveins: $sgpr4, $vgpr0, $vgpr1, $sgpr10_sgpr11
+  ; GCN-NEXT:   liveins: $sgpr4, $sgpr10_sgpr11
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   S_STORE_DWORD_IMM $sgpr4, $sgpr10_sgpr11, 0, 0
-  ; GCN-NEXT:   $sgpr5 = V_READFIRSTLANE_B32 killed $vgpr1, implicit $exec
+  ; GCN-NEXT:   $sgpr5 = V_READFIRSTLANE_B32 [[V_MOV_B32_e32_]], implicit $exec
   ; GCN-NEXT:   S_STORE_DWORD_IMM $sgpr5, $sgpr10_sgpr11, 0, 4
-  ; GCN-NEXT:   renamable $vgpr1 = V_MOV_B32_e32 5, implicit $exec
+  ; GCN-NEXT:   [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
   ; GCN-NEXT:   S_CBRANCH_EXECZ %bb.3, implicit $exec
   ; GCN-NEXT:   S_BRANCH %bb.3
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT: bb.3:
   ; GCN-NEXT:   successors: %bb.5(0x40000000), %bb.4(0x40000000)
-  ; GCN-NEXT:   liveins: $vgpr0, $vgpr1
   ; GCN-NEXT: {{  $}}
-  ; GCN-NEXT:   $vcc = V_CMP_EQ_U32_e64 0, $vgpr1, implicit $exec
+  ; GCN-NEXT:   $vcc = V_CMP_EQ_U32_e64 0, [[V_MOV_B32_e32_1]], implicit $exec
   ; GCN-NEXT:   $sgpr6_sgpr7 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
   ; GCN-NEXT:   S_CBRANCH_SCC1 %bb.5, implicit $scc
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT: bb.4:
   ; GCN-NEXT:   successors: %bb.3(0x80000000)
-  ; GCN-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr6_sgpr7
+  ; GCN-NEXT:   liveins: $sgpr6_sgpr7
   ; GCN-NEXT: {{  $}}
-  ; GCN-NEXT:   renamable $vgpr1 = V_SUB_U32_e32 1, killed $vgpr1, implicit $exec
-  ; GCN-NEXT:   renamable $vgpr1 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
+  ; GCN-NEXT:   [[V_SUB_U32_e32_:%[0-9]+]]:vgpr_32 = V_SUB_U32_e32 1, [[V_MOV_B32_e32_1]], implicit $exec
+  ; GCN-NEXT:   [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 [[V_SUB_U32_e32_]], implicit $exec
   ; GCN-NEXT:   S_BRANCH %bb.3
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT: bb.5:
-  ; GCN-NEXT:   liveins: $vgpr0, $sgpr6_sgpr7
+  ; GCN-NEXT:   liveins: $sgpr6_sgpr7
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   $exec = S_OR_B64 $exec, $sgpr6_sgpr7, implicit-def $scc
-  ; GCN-NEXT:   KILL killed renamable $vgpr0
   ; GCN-NEXT:   SI_RETURN
   bb.0:
     liveins: $sgpr4, $sgpr10_sgpr11
@@ -239,26 +235,24 @@ body:             |
   ; GCN-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
   ; GCN-NEXT:   liveins: $sgpr4, $vgpr2_vgpr3
   ; GCN-NEXT: {{  $}}
-  ; GCN-NEXT:   renamable $vgpr0 = IMPLICIT_DEF
   ; GCN-NEXT:   S_CBRANCH_EXECZ %bb.2, implicit $exec
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT: bb.1:
-  ; GCN-NEXT:   liveins: $sgpr4, $vgpr0, $vgpr2_vgpr3
+  ; GCN-NEXT:   liveins: $sgpr4, $vgpr2_vgpr3
   ; GCN-NEXT: {{  $}}
-  ; GCN-NEXT:   renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, killed $vgpr0
+  ; GCN-NEXT:   renamable $vgpr63 = IMPLICIT_DEF
+  ; GCN-NEXT:   renamable $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, killed $vgpr63
   ; GCN-NEXT:   S_NOP 0
-  ; GCN-NEXT:   $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0
-  ; GCN-NEXT:   renamable $vgpr1 = V_MOV_B32_e32 10, implicit $exec
-  ; GCN-NEXT:   GLOBAL_STORE_DWORD $vgpr2_vgpr3, killed renamable $vgpr1, 0, 0, implicit $exec
-  ; GCN-NEXT:   KILL killed renamable $vgpr0
+  ; GCN-NEXT:   $sgpr4 = SI_RESTORE_S32_FROM_VGPR killed $vgpr63, 0
+  ; GCN-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 10, implicit $exec
+  ; GCN-NEXT:   GLOBAL_STORE_DWORD $vgpr2_vgpr3, [[V_MOV_B32_e32_]], 0, 0, implicit $exec
   ; GCN-NEXT:   SI_RETURN
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT: bb.2:
-  ; GCN-NEXT:   liveins: $vgpr0, $vgpr2_vgpr3
+  ; GCN-NEXT:   liveins: $vgpr2_vgpr3
   ; GCN-NEXT: {{  $}}
-  ; GCN-NEXT:   renamable $vgpr1 = V_MOV_B32_e32 20, implicit $exec
-  ; GCN-NEXT:   GLOBAL_STORE_DWORD $vgpr2_vgpr3, killed renamable $vgpr1, 0, 0, implicit $exec
-  ; GCN-NEXT:   KILL killed renamable $vgpr0
+  ; GCN-NEXT:   [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 20, implicit $exec
+  ; GCN-NEXT:   GLOBAL_STORE_DWORD $vgpr2_vgpr3, [[V_MOV_B32_e32_1]], 0, 0, implicit $exec
   ; GCN-NEXT:   SI_RETURN
   bb.0:
     liveins: $sgpr4, $vgpr2_vgpr3
diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll
index b9583a73295e2..66af18ac7b167 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll
@@ -2,6 +2,8 @@
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 --mattr=+enable-flat-scratch < %s | FileCheck -check-prefixes=GCN,FLAT_SCR_OPT %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 --mattr=+architected-flat-scratch < %s | FileCheck -check-prefixes=GCN,FLAT_SCR_ARCH %s
 
+; XFAIL: *
+
 declare void @extern_func() #0
 
 define amdgpu_kernel void @stack_object_addrspacecast_in_kernel_no_calls() {
diff --git a/llvm/test/CodeGen/AMDGPU/fold-reload-into-exec.mir b/llvm/test/CodeGen/AMDGPU/fold-reload-into-exec.mir
index ba619a659f1b0..b3f0c1f6dd80e 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-reload-into-exec.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-reload-into-exec.mir
@@ -12,10 +12,12 @@ machineFunctionInfo:
 body:             |
   bb.0:
     ; CHECK-LABEL: name: merge_sgpr_spill_into_copy_from_exec_lo
-    ; CHECK: renamable $vgpr0 = IMPLICIT_DEF
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: S_NOP 0, implicit-def $exec_lo
     ; CHECK-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
-    ; CHECK-NEXT: renamable $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, killed $vgpr0
+    ; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF
+    ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, killed $vgpr0
     ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0
     ; CHECK-NEXT: S_NOP 0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0, implicit killed renamable $sgpr0
     ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 killed $vgpr0, 0
@@ -37,10 +39,12 @@ machineFunctionInfo:
 body:             |
   bb.0:
     ; CHECK-LABEL: name: merge_sgpr_spill_into_copy_from_exec_hi
-    ; CHECK: renamable $vgpr0 = IMPLICIT_DEF
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: S_NOP 0, implicit-def $exec_hi
     ; CHECK-NEXT: $sgpr0 = S_MOV_B32 $exec_hi
-    ; CHECK-NEXT: renamable $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, killed $vgpr0
+    ; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF
+    ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, killed $vgpr0
     ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0
     ; CHECK-NEXT: S_NOP 0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0, implicit killed renamable $sgpr0
     ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 killed $vgpr0, 0
@@ -62,11 +66,13 @@ machineFunctionInfo:
 body:             |
   bb.0:
     ; CHECK-LABEL: name: merge_sgpr_spill_into_copy_from_exec
-    ; CHECK: renamable $vgpr0 = IMPLICIT_DEF
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: S_NOP 0, implicit-def $exec
     ; CHECK-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
-    ; CHECK-NEXT: renamable $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, killed $vgpr0, implicit-def $sgpr0_sgpr1, implicit $sgpr0_sgpr1
-    ; CHECK-NEXT: renamable $vgpr0 = V_WRITELANE_B32 killed $sgpr1, 1, killed $vgpr0, implicit $sgpr0_sgpr1
+    ; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF
+    ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, killed $vgpr0, implicit-def $sgpr0_sgpr1, implicit $sgpr0_sgpr1
+    ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr1, 1, killed $vgpr0, implicit $sgpr0_sgpr1
     ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr0_sgpr1
     ; CHECK-NEXT: $sgpr1 = V_READLANE_B32 $vgpr0, 1
     ; CHECK-NEXT: S_NOP 0, implicit-def dead renamable $sgpr2_sgpr3, implicit-def dead renamable $sgpr0_sgpr1, implicit killed renamable $sgpr0_sgpr1
@@ -93,9 +99,11 @@ machineFunctionInfo:
 body:             |
   bb.0:
     ; CHECK-LABEL: name: reload_sgpr_spill_into_copy_to_exec_lo
-    ; CHECK: renamable $vgpr0 = IMPLICIT_DEF
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: S_NOP 0, implicit-def renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def $exec_lo
-    ; CHECK-NEXT: renamable $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, killed $vgpr0
+    ; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF
+    ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, killed $vgpr0
     ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0
     ; CHECK-NEXT: S_NOP 0, implicit killed renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0
     ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 killed $vgpr0, 0
@@ -116,9 +124,11 @@ machineFunctionInfo:
 body:             |
   bb.0:
     ; CHECK-LABEL: name: reload_sgpr_spill_into_copy_to_exec_hi
-    ; CHECK: renamable $vgpr0 = IMPLICIT_DEF
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: S_NOP 0, implicit-def renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def $exec_hi
-    ; CHECK-NEXT: renamable $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, killed $vgpr0
+    ; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF
+    ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, killed $vgpr0
     ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0
     ; CHECK-NEXT: S_NOP 0, implicit killed renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0
     ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 killed $vgpr0, 0
@@ -139,10 +149,12 @@ machineFunctionInfo:
 body:             |
   bb.0:
     ; CHECK-LABEL: name: reload_sgpr_spill_into_copy_to_exec
-    ; CHECK: renamable $vgpr0 = IMPLICIT_DEF
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: S_NOP 0, implicit-def renamable $sgpr0_sgpr1, implicit-def dead renamable $sgpr2_sgpr3, implicit-def $exec
-    ; CHECK-NEXT: renamable $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, killed $vgpr0, implicit-def $sgpr0_sgpr1, implicit $sgpr0_sgpr1
-    ; CHECK-NEXT: renamable $vgpr0 = V_WRITELANE_B32 killed $sgpr1, 1, killed $vgpr0, implicit $sgpr0_sgpr1
+    ; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF
+    ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, killed $vgpr0, implicit-def $sgpr0_sgpr1, implicit $sgpr0_sgpr1
+    ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr1, 1, killed $vgpr0, implicit $sgpr0_sgpr1
     ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr0_sgpr1
     ; CHECK-NEXT: $sgpr1 = V_READLANE_B32 $vgpr0, 1
     ; CHECK-NEXT: S_NOP 0, implicit killed renamable $sgpr0_sgpr1, implicit-def dead renamable $sgpr2_sgpr3, implicit-def dead renamable $sgpr0_sgpr1
diff --git a/llvm/test/CodeGen/AMDGPU/fold-reload-into-m0.mir b/llvm/test/CodeGen/AMDGPU/fold-reload-into-m0.mir
index 1c7896fcb4f14..a7529efc78446 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-reload-into-m0.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-reload-into-m0.mir
@@ -13,10 +13,12 @@ body:             |
   bb.0:
 
     ; CHECK-LABEL: name: merge_sgpr_spill_into_copy_from_m0
-    ; CHECK: renamable $vgpr0 = IMPLICIT_DEF
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: S_NOP 0, implicit-def $m0
     ; CHECK-NEXT: $sgpr0 = S_MOV_B32 $m0
-    ; CHECK-NEXT: renamable $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, killed $vgpr0
+    ; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF
+    ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, killed $vgpr0
     ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0
     ; CHECK-NEXT: S_NOP 0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0, implicit killed renamable $sgpr0
     ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 killed $vgpr0, 0
@@ -43,9 +45,11 @@ body:             |
   bb.0:
 
     ; CHECK-LABEL: name: reload_sgpr_spill_into_copy_to_m0
-    ; CHECK: renamable $vgpr0 = IMPLICIT_DEF
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF
     ; CHECK-NEXT: S_NOP 0, implicit-def renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def $m0
-    ; CHECK-NEXT: renamable $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, killed $vgpr0
+    ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, killed $vgpr0
     ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0
     ; CHECK-NEXT: S_NOP 0, implicit killed renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0
     ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 killed $vgpr0, 0
diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll
index 3b078c41f4a84..7d07641f455e3 100644
--- a/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll
+++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll
@@ -2635,7 +2635,7 @@ define amdgpu_gfx void @call_72xi32() #1 {
 ; GFX9-NEXT:    s_add_i32 s33, s32, 0x7fc0
 ; GFX9-NEXT:    s_and_b32 s33, s33, 0xffff8000
 ; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
-; GFX9-NEXT:    buffer_store_dword v63, off, s[0:3], s33 offset:1536 ; 4-byte Folded Spill
+; GFX9-NEXT:    buffer_store_dword v63, off, s[0:3], s33 offset:1564 ; 4-byte Folded Spill
 ; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
 ; GFX9-NEXT:    s_add_i32 s32, s32, 0x28000
 ; GFX9-NEXT:    v_mov_b32_e32 v0, 0
@@ -2775,25 +2775,25 @@ define amdgpu_gfx void @call_72xi32() #1 {
 ; GFX9-NEXT:    buffer_load_dword v8, off, s[0:3], s33 offset:796
 ; GFX9-NEXT:    buffer_load_dword v9, off, s[0:3], s33 offset:516
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    buffer_store_dword v9, off, s[0:3], s33 offset:1540 ; 4-byte Folded Spill
+; GFX9-NEXT:    buffer_store_dword v9, off, s[0:3], s33 offset:1536 ; 4-byte Folded Spill
 ; GFX9-NEXT:    buffer_load_dword v9, off, s[0:3], s33 offset:520
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    buffer_store_dword v9, off, s[0:3], s33 offset:1544 ; 4-byte Folded Spill
+; GFX9-NEXT:    buffer_store_dword v9, off, s[0:3], s33 offset:1540 ; 4-byte Folded Spill
 ; GFX9-NEXT:    buffer_load_dword v9, off, s[0:3], s33 offset:524
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    buffer_store_dword v9, off, s[0:3], s33 offset:1548 ; 4-byte Folded Spill
+; GFX9-NEXT:    buffer_store_dword v9, off, s[0:3], s33 offset:1544 ; 4-byte Folded Spill
 ; GFX9-NEXT:    buffer_load_dword v9, off, s[0:3], s33 offset:528
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    buffer_store_dword v9, off, s[0:3], s33 offset:1552 ; 4-byte Folded Spill
+; GFX9-NEXT:    buffer_store_dword v9, off, s[0:3], s33 offset:1548 ; 4-byte Folded Spill
 ; GFX9-NEXT:    buffer_load_dword v9, off, s[0:3], s33 offset:532
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    buffer_store_dword v9, off, s[0:3], s33 offset:1556 ; 4-byte Folded Spill
+; GFX9-NEXT:    buffer_store_dword v9, off, s[0:3], s33 offset:1552 ; 4-byte Folded Spill
 ; GFX9-NEXT:    buffer_load_dword v9, off, s[0:3], s33 offset:536
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    buffer_store_dword v9, off, s[0:3], s33 offset:1560 ; 4-byte Folded Spill
+; GFX9-NEXT:    buffer_store_dword v9, off, s[0:3], s33 offset:1556 ; 4-byte Folded Spill
 ; GFX9-NEXT:    buffer_load_dword v9, off, s[0:3], s33 offset:540
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    buffer_store_dword v9, off, s[0:3], s33 offset:1564 ; 4-byte Folded Spill
+; GFX9-NEXT:    buffer_store_dword v9, off, s[0:3], s33 offset:1560 ; 4-byte Folded Spill
 ; GFX9-NEXT:    buffer_load_dword v9, off, s[0:3], s33 offset:544
 ; GFX9-NEXT:    s_nop 0
 ; GFX9-NEXT:    buffer_load_dword v10, off, s[0:3], s33 offset:548
@@ -2861,13 +2861,13 @@ define amdgpu_gfx void @call_72xi32() #1 {
 ; GFX9-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:152
 ; GFX9-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:156
 ; GFX9-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:160
-; GFX9-NEXT:    buffer_load_dword v2, off, s[0:3], s33 offset:1540 ; 4-byte Folded Reload
-; GFX9-NEXT:    buffer_load_dword v3, off, s[0:3], s33 offset:1544 ; 4-byte Folded Reload
-; GFX9-NEXT:    buffer_load_dword v4, off, s[0:3], s33 offset:1548 ; 4-byte Folded Reload
-; GFX9-NEXT:    buffer_load_dword v5, off, s[0:3], s33 offset:1552 ; 4-byte Folded Reload
-; GFX9-NEXT:    buffer_load_dword v6, off, s[0:3], s33 offset:1556 ; 4-byte Folded Reload
-; GFX9-NEXT:    buffer_load_dword v7, off, s[0:3], s33 offset:1560 ; 4-byte Folded Reload
-; GFX9-NEXT:    buffer_load_dword v8, off, s[0:3], s33 offset:1564 ; 4-byte Folded Reload
+; GFX9-NEXT:    buffer_load_dword v2, off, s[0:3], s33 offset:1536 ; 4-byte Folded Reload
+; GFX9-NEXT:    buffer_load_dword v3, off, s[0:3], s33 offset:1540 ; 4-byte Folded Reload
+; GFX9-NEXT:    buffer_load_dword v4, off, s[0:3], s33 offset:1544 ; 4-byte Folded Reload
+; GFX9-NEXT:    buffer_load_dword v5, off, s[0:3], s33 offset:1548 ; 4-byte Folded Reload
+; GFX9-NEXT:    buffer_load_dword v6, off, s[0:3], s33 offset:1552 ; 4-byte Folded Reload
+; GFX9-NEXT:    buffer_load_dword v7, off, s[0:3], s33 offset:1556 ; 4-byte Folded Reload
+; GFX9-NEXT:    buffer_load_dword v8, off, s[0:3], s33 offset:1560 ; 4-byte Folded Reload
 ; GFX9-NEXT:    v_lshrrev_b32_e64 v0, 6, s33
 ; GFX9-NEXT:    v_add_u32_e32 v0, 0x400, v0
 ; GFX9-NEXT:    v_mov_b32_e32 v1, 42
@@ -2890,7 +2890,7 @@ define amdgpu_gfx void @call_72xi32() #1 {
 ; GFX9-NEXT:    v_readlane_b32 s31, v63, 1
 ; GFX9-NEXT:    v_readlane_b32 s30, v63, 0
 ; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
-; GFX9-NEXT:    buffer_load_dword v63, off, s[0:3], s33 offset:1536 ; 4-byte Folded Reload
+; GFX9-NEXT:    buffer_load_dword v63, off, s[0:3], s33 offset:1564 ; 4-byte Folded Reload
 ; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
 ; GFX9-NEXT:    s_add_i32 s32, s32, 0xfffd8000
 ; GFX9-NEXT:    s_mov_b32 s33, s36
@@ -2904,7 +2904,7 @@ define amdgpu_gfx void @call_72xi32() #1 {
 ; GFX10-NEXT:    s_add_i32 s33, s32, 0x3fe0
 ; GFX10-NEXT:    s_and_b32 s33, s33, 0xffffc000
 ; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
-; GFX10-NEXT:    buffer_store_dword v63, off, s[0:3], s33 offset:1536 ; 4-byte Folded Spill
+; GFX10-NEXT:    buffer_store_dword v63, off, s[0:3], s33 offset:1568 ; 4-byte Folded Spill
 ; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX10-NEXT:    s_mov_b32 exec_lo, s34
 ; GFX10-NEXT:    v_mov_b32_e32 v0, 0
@@ -3046,28 +3046,28 @@ define amdgpu_gfx void @call_72xi32() #1 {
 ; GFX10-NEXT:    buffer_load_dword v8, off, s[0:3], s33 offset:796
 ; GFX10-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:516
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:1540 ; 4-byte Folded Spill
+; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:1536 ; 4-byte Folded Spill
 ; GFX10-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:520
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:1544 ; 4-byte Folded Spill
+; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:1540 ; 4-byte Folded Spill
 ; GFX10-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:524
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:1548 ; 4-byte Folded Spill
+; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:1544 ; 4-byte Folded Spill
 ; GFX10-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:528
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:1552 ; 4-byte Folded Spill
+; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:1548 ; 4-byte Folded Spill
 ; GFX10-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:532
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:1556 ; 4-byte Folded Spill
+; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:1552 ; 4-byte Folded Spill
 ; GFX10-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:536
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:1560 ; 4-byte Folded Spill
+; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:1556 ; 4-byte Folded Spill
 ; GFX10-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:540
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:1564 ; 4-byte Folded Spill
+; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:1560 ; 4-byte Folded Spill
 ; GFX10-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:544
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:1568 ; 4-byte Folded Spill
+; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:1564 ; 4-byte Folded Spill
 ; GFX10-NEXT:    s_clause 0x15
 ; GFX10-NEXT:    buffer_load_dword v10, off, s[0:3], s33 offset:548
 ; GFX10-NEXT:    buffer_load_dword v11, off, s[0:3], s33 offset:552
@@ -3134,14 +3134,14 @@ define amdgpu_gfx void @call_72xi32() #1 {
 ; GFX10-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:156
 ; GFX10-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:160
 ; GFX10-NEXT:    s_clause 0x7
-; GFX10-NEXT:    buffer_load_dword v2, off, s[0:3], s33 offset:1540
-; GFX10-NEXT:    buffer_load_dword v3, off, s[0:3], s33 offset:1544
-; GFX10-NEXT:    buffer_load_dword v4, off, s[0:3], s33 offset:1548
-; GFX10-NEXT:    buffer_load_dword v5, off, s[0:3], s33 offset:1552
-; GFX10-NEXT:    buffer_load_dword v6, off, s[0:3], s33 offset:1556
-; GFX10-NEXT:    buffer_load_dword v7, off, s[0:3], s33 offset:1560
-; GFX10-NEXT:    buffer_load_dword v8, off, s[0:3], s33 offset:1564
-; GFX10-NEXT:    buffer_load_dword v9, off, s[0:3], s33 offset:1568
+; GFX10-NEXT:    buffer_load_dword v2, off, s[0:3], s33 offset:1536
+; GFX10-NEXT:    buffer_load_dword v3, off, s[0:3], s33 offset:1540
+; GFX10-NEXT:    buffer_load_dword v4, off, s[0:3], s33 offset:1544
+; GFX10-NEXT:    buffer_load_dword v5, off, s[0:3], s33 offset:1548
+; GFX10-NEXT:    buffer_load_dword v6, off, s[0:3], s33 offset:1552
+; GFX10-NEXT:    buffer_load_dword v7, off, s[0:3], s33 offset:1556
+; GFX10-NEXT:    buffer_load_dword v8, off, s[0:3], s33 offset:1560
+; GFX10-NEXT:    buffer_load_dword v9, off, s[0:3], s33 offset:1564
 ; GFX10-NEXT:    v_lshrrev_b32_e64 v0, 5, s33
 ; GFX10-NEXT:    v_mov_b32_e32 v1, 42
 ; GFX10-NEXT:    v_add_nc_u32_e32 v0, 0x400, v0
@@ -3165,7 +3165,7 @@ define amdgpu_gfx void @call_72xi32() #1 {
 ; GFX10-NEXT:    v_readlane_b32 s31, v63, 1
 ; GFX10-NEXT:    v_readlane_b32 s30, v63, 0
 ; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
-; GFX10-NEXT:    buffer_load_dword v63, off, s[0:3], s33 offset:1536 ; 4-byte Folded Reload
+; GFX10-NEXT:    buffer_load_dword v63, off, s[0:3], s33 offset:1568 ; 4-byte Folded Reload
 ; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX10-NEXT:    s_mov_b32 exec_lo, s34
 ; GFX10-NEXT:    s_add_i32 s32, s32, 0xfffec000
@@ -3181,7 +3181,7 @@ define amdgpu_gfx void @call_72xi32() #1 {
 ; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
 ; GFX11-NEXT:    s_and_b32 s33, s33, 0xfffffe00
 ; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
-; GFX11-NEXT:    scratch_store_b32 off, v60, s33 offset:1536 ; 4-byte Folded Spill
+; GFX11-NEXT:    scratch_store_b32 off, v60, s33 offset:1600 ; 4-byte Folded Spill
 ; GFX11-NEXT:    s_mov_b32 exec_lo, s0
 ; GFX11-NEXT:    s_mov_b32 s0, 0
 ; GFX11-NEXT:    v_mov_b32_e32 v4, 0
@@ -3267,7 +3267,7 @@ define amdgpu_gfx void @call_72xi32() #1 {
 ; GFX11-NEXT:    s_waitcnt vmcnt(2)
 ; GFX11-NEXT:    v_dual_mov_b32 v14, v1 :: v_dual_mov_b32 v1, v4
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    scratch_store_b128 off, v[16:19], s33 offset:1588 ; 16-byte Folded Spill
+; GFX11-NEXT:    scratch_store_b128 off, v[16:19], s33 offset:1584 ; 16-byte Folded Spill
 ; GFX11-NEXT:    s_clause 0x3
 ; GFX11-NEXT:    scratch_load_b128 v[16:19], off, s33 offset:528
 ; GFX11-NEXT:    scratch_load_b128 v[20:23], off, s33 offset:544
@@ -3277,13 +3277,13 @@ define amdgpu_gfx void @call_72xi32() #1 {
 ; GFX11-NEXT:    s_waitcnt vmcnt(2)
 ; GFX11-NEXT:    v_mov_b32_e32 v10, v21
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    scratch_store_b128 off, v[28:31], s33 offset:1572 ; 16-byte Folded Spill
+; GFX11-NEXT:    scratch_store_b128 off, v[28:31], s33 offset:1568 ; 16-byte Folded Spill
 ; GFX11-NEXT:    scratch_load_b128 v[28:31], off, s33 offset:592
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    scratch_store_b128 off, v[28:31], s33 offset:1556 ; 16-byte Folded Spill
+; GFX11-NEXT:    scratch_store_b128 off, v[28:31], s33 offset:1552 ; 16-byte Folded Spill
 ; GFX11-NEXT:    scratch_load_b128 v[28:31], off, s33 offset:608
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    scratch_store_b128 off, v[28:31], s33 offset:1540 ; 16-byte Folded Spill
+; GFX11-NEXT:    scratch_store_b128 off, v[28:31], s33 offset:1536 ; 16-byte Folded Spill
 ; GFX11-NEXT:    scratch_store_b128 off, v[32:35], s32
 ; GFX11-NEXT:    v_dual_mov_b32 v31, v47 :: v_dual_mov_b32 v32, v36
 ; GFX11-NEXT:    v_dual_mov_b32 v33, v48 :: v_dual_mov_b32 v34, v49
@@ -3333,13 +3333,13 @@ define amdgpu_gfx void @call_72xi32() #1 {
 ; GFX11-NEXT:    scratch_store_b128 off, v[48:51], s2
 ; GFX11-NEXT:    s_add_i32 s2, s32, 16
 ; GFX11-NEXT:    scratch_store_b128 off, v[32:35], s2
-; GFX11-NEXT:    scratch_load_b128 v[1:4], off, s33 offset:1588 ; 16-byte Folded Reload
+; GFX11-NEXT:    scratch_load_b128 v[1:4], off, s33 offset:1584 ; 16-byte Folded Reload
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    v_mov_b32_e32 v1, 42
 ; GFX11-NEXT:    s_clause 0x2
-; GFX11-NEXT:    scratch_load_b128 v[17:20], off, s33 offset:1572
-; GFX11-NEXT:    scratch_load_b128 v[21:24], off, s33 offset:1556
-; GFX11-NEXT:    scratch_load_b128 v[25:28], off, s33 offset:1540
+; GFX11-NEXT:    scratch_load_b128 v[17:20], off, s33 offset:1568
+; GFX11-NEXT:    scratch_load_b128 v[21:24], off, s33 offset:1552
+; GFX11-NEXT:    scratch_load_b128 v[25:28], off, s33 offset:1536
 ; GFX11-NEXT:    s_add_i32 s2, s33, 0x400
 ; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
 ; GFX11-NEXT:    v_mov_b32_e32 v0, s2
@@ -3360,7 +3360,7 @@ define amdgpu_gfx void @call_72xi32() #1 {
 ; GFX11-NEXT:    v_readlane_b32 s31, v60, 1
 ; GFX11-NEXT:    v_readlane_b32 s30, v60, 0
 ; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
-; GFX11-NEXT:    scratch_load_b32 v60, off, s33 offset:1536 ; 4-byte Folded Reload
+; GFX11-NEXT:    scratch_load_b32 v60, off, s33 offset:1600 ; 4-byte Folded Reload
 ; GFX11-NEXT:    s_mov_b32 exec_lo, s0
 ; GFX11-NEXT:    s_addk_i32 s32, 0xf600
 ; GFX11-NEXT:    s_mov_b32 s33, s34
diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll
index 6555ceb3ed338..75089b2a8760c 100644
--- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll
@@ -705,8 +705,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
 ; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-DPP-NEXT:    v_mbcnt_lo_u32_b32 v1, exec_lo, 0
-; GFX9-DPP-NEXT:    v_mbcnt_hi_u32_b32 v1, exec_hi, v1
 ; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, v0
+; GFX9-DPP-NEXT:    v_mbcnt_hi_u32_b32 v1, exec_hi, v1
 ; GFX9-DPP-NEXT:    s_not_b64 exec, exec
 ; GFX9-DPP-NEXT:    v_bfrev_b32_e32 v3, 1
 ; GFX9-DPP-NEXT:    s_not_b64 exec, exec
@@ -1909,8 +1909,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope
 ; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-DPP-NEXT:    v_mbcnt_lo_u32_b32 v1, exec_lo, 0
-; GFX9-DPP-NEXT:    v_mbcnt_hi_u32_b32 v1, exec_hi, v1
 ; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, v0
+; GFX9-DPP-NEXT:    v_mbcnt_hi_u32_b32 v1, exec_hi, v1
 ; GFX9-DPP-NEXT:    s_not_b64 exec, exec
 ; GFX9-DPP-NEXT:    v_bfrev_b32_e32 v3, 1
 ; GFX9-DPP-NEXT:    s_not_b64 exec, exec
@@ -3113,8 +3113,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
 ; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-DPP-NEXT:    v_mbcnt_lo_u32_b32 v1, exec_lo, 0
-; GFX9-DPP-NEXT:    v_mbcnt_hi_u32_b32 v1, exec_hi, v1
 ; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, v0
+; GFX9-DPP-NEXT:    v_mbcnt_hi_u32_b32 v1, exec_hi, v1
 ; GFX9-DPP-NEXT:    s_not_b64 exec, exec
 ; GFX9-DPP-NEXT:    v_bfrev_b32_e32 v3, 1
 ; GFX9-DPP-NEXT:    s_not_b64 exec, exec
@@ -3833,8 +3833,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
 ; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-DPP-NEXT:    v_mbcnt_lo_u32_b32 v1, exec_lo, 0
-; GFX9-DPP-NEXT:    v_mbcnt_hi_u32_b32 v1, exec_hi, v1
 ; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, v0
+; GFX9-DPP-NEXT:    v_mbcnt_hi_u32_b32 v1, exec_hi, v1
 ; GFX9-DPP-NEXT:    s_not_b64 exec, exec
 ; GFX9-DPP-NEXT:    v_bfrev_b32_e32 v3, 1
 ; GFX9-DPP-NEXT:    s_not_b64 exec, exec
@@ -5036,8 +5036,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop
 ; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-DPP-NEXT:    v_mbcnt_lo_u32_b32 v1, exec_lo, 0
-; GFX9-DPP-NEXT:    v_mbcnt_hi_u32_b32 v1, exec_hi, v1
 ; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, v0
+; GFX9-DPP-NEXT:    v_mbcnt_hi_u32_b32 v1, exec_hi, v1
 ; GFX9-DPP-NEXT:    s_not_b64 exec, exec
 ; GFX9-DPP-NEXT:    v_bfrev_b32_e32 v3, 1
 ; GFX9-DPP-NEXT:    s_not_b64 exec, exec
diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll
index 6548792180a0e..c784489fb43b3 100644
--- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll
@@ -771,8 +771,8 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
 ; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-DPP-NEXT:    v_mbcnt_lo_u32_b32 v1, exec_lo, 0
-; GFX9-DPP-NEXT:    v_mbcnt_hi_u32_b32 v1, exec_hi, v1
 ; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, v0
+; GFX9-DPP-NEXT:    v_mbcnt_hi_u32_b32 v1, exec_hi, v1
 ; GFX9-DPP-NEXT:    s_not_b64 exec, exec
 ; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, 0xff800000
 ; GFX9-DPP-NEXT:    s_not_b64 exec, exec
@@ -1950,8 +1950,8 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
 ; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-DPP-NEXT:    v_mbcnt_lo_u32_b32 v1, exec_lo, 0
-; GFX9-DPP-NEXT:    v_mbcnt_hi_u32_b32 v1, exec_hi, v1
 ; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, v0
+; GFX9-DPP-NEXT:    v_mbcnt_hi_u32_b32 v1, exec_hi, v1
 ; GFX9-DPP-NEXT:    s_not_b64 exec, exec
 ; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, 0xff800000
 ; GFX9-DPP-NEXT:    s_not_b64 exec, exec
@@ -3129,8 +3129,8 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
 ; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-DPP-NEXT:    v_mbcnt_lo_u32_b32 v1, exec_lo, 0
-; GFX9-DPP-NEXT:    v_mbcnt_hi_u32_b32 v1, exec_hi, v1
 ; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, v0
+; GFX9-DPP-NEXT:    v_mbcnt_hi_u32_b32 v1, exec_hi, v1
 ; GFX9-DPP-NEXT:    s_not_b64 exec, exec
 ; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, 0xff800000
 ; GFX9-DPP-NEXT:    s_not_b64 exec, exec
diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll
index 6936cdc4d379a..70e8c94475d31 100644
--- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll
@@ -771,8 +771,8 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
 ; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-DPP-NEXT:    v_mbcnt_lo_u32_b32 v1, exec_lo, 0
-; GFX9-DPP-NEXT:    v_mbcnt_hi_u32_b32 v1, exec_hi, v1
 ; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, v0
+; GFX9-DPP-NEXT:    v_mbcnt_hi_u32_b32 v1, exec_hi, v1
 ; GFX9-DPP-NEXT:    s_not_b64 exec, exec
 ; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX9-DPP-NEXT:    s_not_b64 exec, exec
@@ -1950,8 +1950,8 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
 ; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-DPP-NEXT:    v_mbcnt_lo_u32_b32 v1, exec_lo, 0
-; GFX9-DPP-NEXT:    v_mbcnt_hi_u32_b32 v1, exec_hi, v1
 ; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, v0
+; GFX9-DPP-NEXT:    v_mbcnt_hi_u32_b32 v1, exec_hi, v1
 ; GFX9-DPP-NEXT:    s_not_b64 exec, exec
 ; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX9-DPP-NEXT:    s_not_b64 exec, exec
@@ -3129,8 +3129,8 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
 ; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-DPP-NEXT:    v_mbcnt_lo_u32_b32 v1, exec_lo, 0
-; GFX9-DPP-NEXT:    v_mbcnt_hi_u32_b32 v1, exec_hi, v1
 ; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, v0
+; GFX9-DPP-NEXT:    v_mbcnt_hi_u32_b32 v1, exec_hi, v1
 ; GFX9-DPP-NEXT:    s_not_b64 exec, exec
 ; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX9-DPP-NEXT:    s_not_b64 exec, exec
diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll
index 5cb57703c01d9..ce23280e44c46 100644
--- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll
@@ -783,8 +783,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
 ; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-DPP-NEXT:    v_mbcnt_lo_u32_b32 v1, exec_lo, 0
-; GFX9-DPP-NEXT:    v_mbcnt_hi_u32_b32 v1, exec_hi, v1
 ; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, v0
+; GFX9-DPP-NEXT:    v_mbcnt_hi_u32_b32 v1, exec_hi, v1
 ; GFX9-DPP-NEXT:    s_not_b64 exec, exec
 ; GFX9-DPP-NEXT:    v_bfrev_b32_e32 v3, 1
 ; GFX9-DPP-NEXT:    s_not_b64 exec, exec
@@ -2013,8 +2013,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope
 ; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-DPP-NEXT:    v_mbcnt_lo_u32_b32 v1, exec_lo, 0
-; GFX9-DPP-NEXT:    v_mbcnt_hi_u32_b32 v1, exec_hi, v1
 ; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, v0
+; GFX9-DPP-NEXT:    v_mbcnt_hi_u32_b32 v1, exec_hi, v1
 ; GFX9-DPP-NEXT:    s_not_b64 exec, exec
 ; GFX9-DPP-NEXT:    v_bfrev_b32_e32 v3, 1
 ; GFX9-DPP-NEXT:    s_not_b64 exec, exec
@@ -3243,8 +3243,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
 ; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-DPP-NEXT:    v_mbcnt_lo_u32_b32 v1, exec_lo, 0
-; GFX9-DPP-NEXT:    v_mbcnt_hi_u32_b32 v1, exec_hi, v1
 ; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, v0
+; GFX9-DPP-NEXT:    v_mbcnt_hi_u32_b32 v1, exec_hi, v1
 ; GFX9-DPP-NEXT:    s_not_b64 exec, exec
 ; GFX9-DPP-NEXT:    v_bfrev_b32_e32 v3, 1
 ; GFX9-DPP-NEXT:    s_not_b64 exec, exec
@@ -4015,8 +4015,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
 ; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-DPP-NEXT:    v_mbcnt_lo_u32_b32 v1, exec_lo, 0
-; GFX9-DPP-NEXT:    v_mbcnt_hi_u32_b32 v1, exec_hi, v1
 ; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, v0
+; GFX9-DPP-NEXT:    v_mbcnt_hi_u32_b32 v1, exec_hi, v1
 ; GFX9-DPP-NEXT:    s_not_b64 exec, exec
 ; GFX9-DPP-NEXT:    v_bfrev_b32_e32 v3, 1
 ; GFX9-DPP-NEXT:    s_not_b64 exec, exec
@@ -5244,8 +5244,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop
 ; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-DPP-NEXT:    v_mbcnt_lo_u32_b32 v1, exec_lo, 0
-; GFX9-DPP-NEXT:    v_mbcnt_hi_u32_b32 v1, exec_hi, v1
 ; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, v0
+; GFX9-DPP-NEXT:    v_mbcnt_hi_u32_b32 v1, exec_hi, v1
 ; GFX9-DPP-NEXT:    s_not_b64 exec, exec
 ; GFX9-DPP-NEXT:    v_bfrev_b32_e32 v3, 1
 ; GFX9-DPP-NEXT:    s_not_b64 exec, exec
diff --git a/llvm/test/CodeGen/AMDGPU/greedy-instruction-split-subrange.mir b/llvm/test/CodeGen/AMDGPU/greedy-instruction-split-subrange.mir
index 742498cdd8bd1..c76a84cb1c5d4 100644
--- a/llvm/test/CodeGen/AMDGPU/greedy-instruction-split-subrange.mir
+++ b/llvm/test/CodeGen/AMDGPU/greedy-instruction-split-subrange.mir
@@ -21,14 +21,10 @@ body:             |
     ; CHECK-LABEL: name: split_instruction_subranges
     ; CHECK: [[GLOBAL_LOAD_DWORDX2_SADDR:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR undef $sgpr0_sgpr1, undef %1:vgpr_32, 0, 0, implicit $exec :: (load (s64), addrspace 1)
     ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_SADDR1:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR undef $sgpr0_sgpr1, undef %3:vgpr_32, 4, 0, implicit $exec :: (load (s64), addrspace 1)
-    ; CHECK-NEXT: SI_SPILL_V64_SAVE [[GLOBAL_LOAD_DWORDX2_SADDR1]], %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5)
     ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_SADDR2:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR undef $sgpr0_sgpr1, undef %5:vgpr_32, 8, 0, implicit $exec :: (load (s64), addrspace 1)
-    ; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub1:vreg_64 = COPY [[GLOBAL_LOAD_DWORDX2_SADDR]].sub1
-    ; CHECK-NEXT: S_NOP 0, implicit [[COPY]].sub1
-    ; CHECK-NEXT: [[SI_SPILL_V64_RESTORE:%[0-9]+]]:vreg_64 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5)
-    ; CHECK-NEXT: S_NOP 0, implicit [[SI_SPILL_V64_RESTORE]].sub0
-    ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub1:vreg_64 = COPY [[GLOBAL_LOAD_DWORDX2_SADDR2]].sub1
-    ; CHECK-NEXT: S_NOP 0, implicit [[COPY1]].sub1
+    ; CHECK-NEXT: S_NOP 0, implicit [[GLOBAL_LOAD_DWORDX2_SADDR]].sub1
+    ; CHECK-NEXT: S_NOP 0, implicit [[GLOBAL_LOAD_DWORDX2_SADDR1]].sub0
+    ; CHECK-NEXT: S_NOP 0, implicit [[GLOBAL_LOAD_DWORDX2_SADDR2]].sub1
     ; CHECK-NEXT: S_ENDPGM 0
     %1:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR undef $sgpr0_sgpr1, undef %4:vgpr_32, 0, 0, implicit $exec :: (load (s64), addrspace 1)
     %2:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR undef $sgpr0_sgpr1, undef %5:vgpr_32, 4, 0, implicit $exec :: (load (s64), addrspace 1)
@@ -61,23 +57,13 @@ body:             |
     ; CHECK-LABEL: name: split_instruction_subranges_use_is_subreg_def
     ; CHECK: [[GLOBAL_LOAD_DWORDX2_SADDR:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR undef $sgpr0_sgpr1, undef %1:vgpr_32, 0, 0, implicit $exec :: (load (s64), addrspace 1)
     ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_SADDR1:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR undef $sgpr0_sgpr1, undef %3:vgpr_32, 4, 0, implicit $exec :: (load (s64), addrspace 1)
-    ; CHECK-NEXT: SI_SPILL_V64_SAVE [[GLOBAL_LOAD_DWORDX2_SADDR1]], %stack.1, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.1, align 4, addrspace 5)
     ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_SADDR2:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR undef $sgpr0_sgpr1, undef %5:vgpr_32, 8, 0, implicit $exec :: (load (s64), addrspace 1)
-    ; CHECK-NEXT: SI_SPILL_V64_SAVE [[GLOBAL_LOAD_DWORDX2_SADDR2]], %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5)
     ; CHECK-NEXT: S_NOP 0, implicit-def [[GLOBAL_LOAD_DWORDX2_SADDR]].sub0
-    ; CHECK-NEXT: [[SI_SPILL_V64_RESTORE:%[0-9]+]]:vreg_64 = SI_SPILL_V64_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.1, align 4, addrspace 5)
-    ; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub0:vreg_64 = COPY [[SI_SPILL_V64_RESTORE]].sub0
-    ; CHECK-NEXT: S_NOP 0, implicit-def [[COPY]].sub1
-    ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0:vreg_64 = COPY [[COPY]].sub0
-    ; CHECK-NEXT: [[SI_SPILL_V64_RESTORE1:%[0-9]+]]:vreg_64 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5)
-    ; CHECK-NEXT: undef [[COPY2:%[0-9]+]].sub1:vreg_64 = COPY [[SI_SPILL_V64_RESTORE1]].sub1
-    ; CHECK-NEXT: S_NOP 0, implicit-def [[COPY2]].sub0
-    ; CHECK-NEXT: undef [[COPY3:%[0-9]+]].sub1:vreg_64 = COPY [[COPY2]].sub1
+    ; CHECK-NEXT: S_NOP 0, implicit-def [[GLOBAL_LOAD_DWORDX2_SADDR1]].sub1
+    ; CHECK-NEXT: S_NOP 0, implicit-def [[GLOBAL_LOAD_DWORDX2_SADDR2]].sub0
     ; CHECK-NEXT: S_NOP 0, implicit [[GLOBAL_LOAD_DWORDX2_SADDR]].sub1
-    ; CHECK-NEXT: undef [[COPY4:%[0-9]+]].sub0:vreg_64 = COPY [[COPY1]].sub0
-    ; CHECK-NEXT: S_NOP 0, implicit [[COPY4]].sub0
-    ; CHECK-NEXT: undef [[COPY5:%[0-9]+]].sub1:vreg_64 = COPY [[COPY3]].sub1
-    ; CHECK-NEXT: S_NOP 0, implicit [[COPY5]].sub1
+    ; CHECK-NEXT: S_NOP 0, implicit [[GLOBAL_LOAD_DWORDX2_SADDR1]].sub0
+    ; CHECK-NEXT: S_NOP 0, implicit [[GLOBAL_LOAD_DWORDX2_SADDR2]].sub1
     ; CHECK-NEXT: S_ENDPGM 0
     %1:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR undef $sgpr0_sgpr1, undef %4:vgpr_32, 0, 0, implicit $exec :: (load (s64), addrspace 1)
     %2:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR undef $sgpr0_sgpr1, undef %5:vgpr_32, 4, 0, implicit $exec :: (load (s64), addrspace 1)
diff --git a/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll b/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll
index 297b5180dfe9b..10c88f35d204a 100644
--- a/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll
+++ b/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll
@@ -6,209 +6,209 @@ define void @main(i1 %arg) #0 {
 ; CHECK:       ; %bb.0: ; %bb
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; CHECK-NEXT:    s_xor_saveexec_b64 s[4:5], -1
-; CHECK-NEXT:    buffer_store_dword v8, off, s[0:3], s32 ; 4-byte Folded Spill
-; CHECK-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
-; CHECK-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
+; CHECK-NEXT:    buffer_store_dword v5, off, s[0:3], s32 ; 4-byte Folded Spill
+; CHECK-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; CHECK-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
 ; CHECK-NEXT:    s_mov_b64 exec, s[4:5]
-; CHECK-NEXT:    v_writelane_b32 v8, s30, 0
-; CHECK-NEXT:    v_writelane_b32 v8, s31, 1
-; CHECK-NEXT:    v_writelane_b32 v8, s36, 2
-; CHECK-NEXT:    v_writelane_b32 v8, s37, 3
-; CHECK-NEXT:    v_writelane_b32 v8, s38, 4
-; CHECK-NEXT:    v_writelane_b32 v8, s39, 5
-; CHECK-NEXT:    v_writelane_b32 v8, s40, 6
-; CHECK-NEXT:    v_writelane_b32 v8, s41, 7
-; CHECK-NEXT:    v_writelane_b32 v8, s42, 8
-; CHECK-NEXT:    v_writelane_b32 v8, s43, 9
-; CHECK-NEXT:    v_writelane_b32 v8, s44, 10
-; CHECK-NEXT:    v_writelane_b32 v8, s45, 11
-; CHECK-NEXT:    v_writelane_b32 v8, s46, 12
-; CHECK-NEXT:    v_writelane_b32 v8, s47, 13
-; CHECK-NEXT:    v_writelane_b32 v8, s48, 14
-; CHECK-NEXT:    v_writelane_b32 v8, s49, 15
+; CHECK-NEXT:    v_writelane_b32 v5, s30, 0
+; CHECK-NEXT:    v_writelane_b32 v5, s31, 1
+; CHECK-NEXT:    v_writelane_b32 v5, s36, 2
+; CHECK-NEXT:    v_writelane_b32 v5, s37, 3
+; CHECK-NEXT:    v_writelane_b32 v5, s38, 4
+; CHECK-NEXT:    v_writelane_b32 v5, s39, 5
+; CHECK-NEXT:    v_writelane_b32 v5, s40, 6
+; CHECK-NEXT:    v_writelane_b32 v5, s41, 7
+; CHECK-NEXT:    v_writelane_b32 v5, s42, 8
+; CHECK-NEXT:    v_writelane_b32 v5, s43, 9
+; CHECK-NEXT:    v_writelane_b32 v5, s44, 10
+; CHECK-NEXT:    v_writelane_b32 v5, s45, 11
+; CHECK-NEXT:    v_writelane_b32 v5, s46, 12
+; CHECK-NEXT:    v_writelane_b32 v5, s47, 13
+; CHECK-NEXT:    v_writelane_b32 v5, s48, 14
+; CHECK-NEXT:    v_writelane_b32 v5, s49, 15
 ; CHECK-NEXT:    s_getpc_b64 s[24:25]
-; CHECK-NEXT:    v_writelane_b32 v8, s50, 16
+; CHECK-NEXT:    v_writelane_b32 v5, s50, 16
 ; CHECK-NEXT:    s_movk_i32 s4, 0xf0
 ; CHECK-NEXT:    s_mov_b32 s5, s24
-; CHECK-NEXT:    v_writelane_b32 v8, s51, 17
+; CHECK-NEXT:    v_writelane_b32 v5, s51, 17
 ; CHECK-NEXT:    s_load_dwordx16 s[36:51], s[4:5], 0x0
-; CHECK-NEXT:    ; implicit-def: $vgpr4 : SGPR spill to VGPR lane
+; CHECK-NEXT:    ; implicit-def: $vgpr7 : SGPR spill to VGPR lane
 ; CHECK-NEXT:    s_mov_b64 s[4:5], 0
 ; CHECK-NEXT:    s_load_dwordx4 s[28:31], s[4:5], 0x0
 ; CHECK-NEXT:    s_movk_i32 s4, 0x130
 ; CHECK-NEXT:    s_mov_b32 s5, s24
 ; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
-; CHECK-NEXT:    v_writelane_b32 v4, s36, 0
-; CHECK-NEXT:    v_writelane_b32 v4, s37, 1
-; CHECK-NEXT:    v_writelane_b32 v4, s38, 2
-; CHECK-NEXT:    v_writelane_b32 v4, s39, 3
-; CHECK-NEXT:    v_writelane_b32 v4, s40, 4
-; CHECK-NEXT:    v_writelane_b32 v4, s41, 5
-; CHECK-NEXT:    v_writelane_b32 v4, s42, 6
-; CHECK-NEXT:    v_writelane_b32 v4, s43, 7
-; CHECK-NEXT:    v_writelane_b32 v4, s44, 8
-; CHECK-NEXT:    v_writelane_b32 v4, s45, 9
-; CHECK-NEXT:    v_writelane_b32 v4, s46, 10
+; CHECK-NEXT:    v_writelane_b32 v7, s36, 0
+; CHECK-NEXT:    v_writelane_b32 v7, s37, 1
+; CHECK-NEXT:    v_writelane_b32 v7, s38, 2
+; CHECK-NEXT:    v_writelane_b32 v7, s39, 3
+; CHECK-NEXT:    v_writelane_b32 v7, s40, 4
+; CHECK-NEXT:    v_writelane_b32 v7, s41, 5
+; CHECK-NEXT:    v_writelane_b32 v7, s42, 6
+; CHECK-NEXT:    v_writelane_b32 v7, s43, 7
+; CHECK-NEXT:    v_writelane_b32 v7, s44, 8
+; CHECK-NEXT:    v_writelane_b32 v7, s45, 9
+; CHECK-NEXT:    v_writelane_b32 v7, s46, 10
 ; CHECK-NEXT:    s_load_dwordx16 s[4:19], s[4:5], 0x0
-; CHECK-NEXT:    v_writelane_b32 v4, s47, 11
-; CHECK-NEXT:    v_writelane_b32 v4, s48, 12
-; CHECK-NEXT:    v_writelane_b32 v4, s49, 13
+; CHECK-NEXT:    v_writelane_b32 v7, s47, 11
+; CHECK-NEXT:    v_writelane_b32 v7, s48, 12
 ; CHECK-NEXT:    s_mov_b32 s20, 0
 ; CHECK-NEXT:    v_mov_b32_e32 v1, 0
-; CHECK-NEXT:    v_writelane_b32 v4, s50, 14
-; CHECK-NEXT:    v_mov_b32_e32 v5, s28
-; CHECK-NEXT:    v_mov_b32_e32 v6, v1
+; CHECK-NEXT:    v_writelane_b32 v7, s49, 13
+; CHECK-NEXT:    v_mov_b32_e32 v2, s28
+; CHECK-NEXT:    v_mov_b32_e32 v3, v1
 ; CHECK-NEXT:    s_mov_b32 s21, s20
 ; CHECK-NEXT:    s_mov_b32 s22, s20
 ; CHECK-NEXT:    s_mov_b32 s23, s20
-; CHECK-NEXT:    v_writelane_b32 v4, s51, 15
+; CHECK-NEXT:    v_writelane_b32 v7, s50, 14
+; CHECK-NEXT:    v_writelane_b32 v7, s51, 15
+; CHECK-NEXT:    image_sample_lz v3, v[2:3], s[44:51], s[20:23] dmask:0x1
 ; CHECK-NEXT:    v_mov_b32_e32 v2, v1
-; CHECK-NEXT:    image_sample_lz v5, v[5:6], s[44:51], s[20:23] dmask:0x1
 ; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
-; CHECK-NEXT:    v_writelane_b32 v4, s4, 16
-; CHECK-NEXT:    v_writelane_b32 v4, s5, 17
-; CHECK-NEXT:    v_writelane_b32 v4, s6, 18
-; CHECK-NEXT:    v_writelane_b32 v4, s7, 19
-; CHECK-NEXT:    v_writelane_b32 v4, s8, 20
-; CHECK-NEXT:    v_writelane_b32 v4, s9, 21
-; CHECK-NEXT:    image_sample_lz v6, v[1:2], s[4:11], s[20:23] dmask:0x1
-; CHECK-NEXT:    v_writelane_b32 v4, s10, 22
-; CHECK-NEXT:    v_writelane_b32 v4, s11, 23
-; CHECK-NEXT:    v_writelane_b32 v4, s12, 24
-; CHECK-NEXT:    v_writelane_b32 v4, s13, 25
-; CHECK-NEXT:    v_writelane_b32 v4, s14, 26
-; CHECK-NEXT:    v_writelane_b32 v4, s15, 27
-; CHECK-NEXT:    v_writelane_b32 v4, s16, 28
-; CHECK-NEXT:    v_writelane_b32 v8, s52, 18
-; CHECK-NEXT:    v_writelane_b32 v4, s17, 29
-; CHECK-NEXT:    v_writelane_b32 v8, s53, 19
-; CHECK-NEXT:    v_writelane_b32 v4, s18, 30
-; CHECK-NEXT:    v_writelane_b32 v8, s54, 20
-; CHECK-NEXT:    v_writelane_b32 v4, s19, 31
+; CHECK-NEXT:    v_writelane_b32 v7, s4, 16
+; CHECK-NEXT:    v_writelane_b32 v7, s5, 17
+; CHECK-NEXT:    v_writelane_b32 v7, s6, 18
+; CHECK-NEXT:    v_writelane_b32 v7, s7, 19
+; CHECK-NEXT:    v_writelane_b32 v7, s8, 20
+; CHECK-NEXT:    v_writelane_b32 v7, s9, 21
+; CHECK-NEXT:    image_sample_lz v4, v[1:2], s[4:11], s[20:23] dmask:0x1
+; CHECK-NEXT:    v_writelane_b32 v7, s10, 22
+; CHECK-NEXT:    v_writelane_b32 v7, s11, 23
+; CHECK-NEXT:    v_writelane_b32 v7, s12, 24
+; CHECK-NEXT:    v_writelane_b32 v7, s13, 25
+; CHECK-NEXT:    v_writelane_b32 v7, s14, 26
+; CHECK-NEXT:    v_writelane_b32 v7, s15, 27
+; CHECK-NEXT:    v_writelane_b32 v7, s16, 28
+; CHECK-NEXT:    v_writelane_b32 v5, s52, 18
+; CHECK-NEXT:    v_writelane_b32 v7, s17, 29
+; CHECK-NEXT:    v_writelane_b32 v5, s53, 19
+; CHECK-NEXT:    v_writelane_b32 v7, s18, 30
+; CHECK-NEXT:    v_writelane_b32 v5, s54, 20
+; CHECK-NEXT:    v_writelane_b32 v7, s19, 31
 ; CHECK-NEXT:    s_mov_b32 s4, 48
 ; CHECK-NEXT:    s_mov_b32 s5, s24
-; CHECK-NEXT:    v_writelane_b32 v8, s55, 21
+; CHECK-NEXT:    v_writelane_b32 v5, s55, 21
 ; CHECK-NEXT:    s_load_dwordx8 s[4:11], s[4:5], 0x0
-; CHECK-NEXT:    v_writelane_b32 v8, s56, 22
-; CHECK-NEXT:    v_writelane_b32 v8, s57, 23
-; CHECK-NEXT:    v_writelane_b32 v8, s58, 24
-; CHECK-NEXT:    v_writelane_b32 v8, s59, 25
-; CHECK-NEXT:    v_writelane_b32 v8, s60, 26
+; CHECK-NEXT:    v_writelane_b32 v5, s56, 22
+; CHECK-NEXT:    v_writelane_b32 v5, s57, 23
+; CHECK-NEXT:    v_writelane_b32 v5, s58, 24
+; CHECK-NEXT:    v_writelane_b32 v5, s59, 25
+; CHECK-NEXT:    v_writelane_b32 v5, s60, 26
 ; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
-; CHECK-NEXT:    v_writelane_b32 v4, s4, 32
-; CHECK-NEXT:    v_writelane_b32 v8, s61, 27
-; CHECK-NEXT:    v_writelane_b32 v4, s5, 33
-; CHECK-NEXT:    v_writelane_b32 v8, s62, 28
-; CHECK-NEXT:    v_writelane_b32 v4, s6, 34
-; CHECK-NEXT:    v_writelane_b32 v8, s63, 29
-; CHECK-NEXT:    v_writelane_b32 v4, s7, 35
-; CHECK-NEXT:    v_writelane_b32 v8, s64, 30
-; CHECK-NEXT:    v_writelane_b32 v4, s8, 36
-; CHECK-NEXT:    v_writelane_b32 v8, s65, 31
-; CHECK-NEXT:    v_writelane_b32 v4, s9, 37
-; CHECK-NEXT:    v_writelane_b32 v8, s66, 32
+; CHECK-NEXT:    v_writelane_b32 v7, s4, 32
+; CHECK-NEXT:    v_writelane_b32 v5, s61, 27
+; CHECK-NEXT:    v_writelane_b32 v7, s5, 33
+; CHECK-NEXT:    v_writelane_b32 v5, s62, 28
+; CHECK-NEXT:    v_writelane_b32 v7, s6, 34
+; CHECK-NEXT:    v_writelane_b32 v5, s63, 29
+; CHECK-NEXT:    v_writelane_b32 v7, s7, 35
+; CHECK-NEXT:    v_writelane_b32 v5, s64, 30
+; CHECK-NEXT:    v_writelane_b32 v7, s8, 36
+; CHECK-NEXT:    v_writelane_b32 v5, s65, 31
+; CHECK-NEXT:    v_writelane_b32 v7, s9, 37
+; CHECK-NEXT:    v_writelane_b32 v5, s66, 32
 ; CHECK-NEXT:    s_movk_i32 s26, 0x1f0
 ; CHECK-NEXT:    s_movk_i32 s28, 0x2f0
 ; CHECK-NEXT:    s_mov_b32 s27, s24
 ; CHECK-NEXT:    s_mov_b32 s29, s24
-; CHECK-NEXT:    v_writelane_b32 v4, s10, 38
-; CHECK-NEXT:    v_writelane_b32 v8, s67, 33
-; CHECK-NEXT:    v_writelane_b32 v4, s11, 39
+; CHECK-NEXT:    v_writelane_b32 v7, s10, 38
+; CHECK-NEXT:    v_writelane_b32 v5, s67, 33
+; CHECK-NEXT:    v_writelane_b32 v7, s11, 39
 ; CHECK-NEXT:    s_load_dwordx16 s[52:67], s[26:27], 0x0
 ; CHECK-NEXT:    s_load_dwordx16 s[4:19], s[28:29], 0x0
 ; CHECK-NEXT:    v_and_b32_e32 v0, 1, v0
 ; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
 ; CHECK-NEXT:    s_xor_b64 s[24:25], vcc, -1
-; CHECK-NEXT:    ; implicit-def: $vgpr3 : SGPR spill to VGPR lane
 ; CHECK-NEXT:    s_waitcnt vmcnt(0)
-; CHECK-NEXT:    v_mul_f32_e32 v0, v6, v5
+; CHECK-NEXT:    v_mul_f32_e32 v0, v4, v3
 ; CHECK-NEXT:    s_and_saveexec_b64 s[26:27], s[24:25]
 ; CHECK-NEXT:    s_xor_b64 s[26:27], exec, s[26:27]
 ; CHECK-NEXT:    s_cbranch_execz .LBB0_3
 ; CHECK-NEXT:  ; %bb.1: ; %bb48
-; CHECK-NEXT:    v_readlane_b32 s36, v4, 0
-; CHECK-NEXT:    v_readlane_b32 s44, v4, 8
-; CHECK-NEXT:    v_readlane_b32 s45, v4, 9
-; CHECK-NEXT:    v_readlane_b32 s46, v4, 10
-; CHECK-NEXT:    v_readlane_b32 s47, v4, 11
-; CHECK-NEXT:    v_readlane_b32 s48, v4, 12
-; CHECK-NEXT:    v_readlane_b32 s49, v4, 13
-; CHECK-NEXT:    v_readlane_b32 s50, v4, 14
-; CHECK-NEXT:    v_readlane_b32 s51, v4, 15
+; CHECK-NEXT:    v_readlane_b32 s36, v7, 0
+; CHECK-NEXT:    v_readlane_b32 s44, v7, 8
+; CHECK-NEXT:    v_readlane_b32 s45, v7, 9
+; CHECK-NEXT:    v_readlane_b32 s46, v7, 10
+; CHECK-NEXT:    v_readlane_b32 s47, v7, 11
+; CHECK-NEXT:    v_readlane_b32 s48, v7, 12
+; CHECK-NEXT:    v_readlane_b32 s49, v7, 13
+; CHECK-NEXT:    v_readlane_b32 s50, v7, 14
+; CHECK-NEXT:    v_readlane_b32 s51, v7, 15
 ; CHECK-NEXT:    s_and_b64 vcc, exec, -1
-; CHECK-NEXT:    v_readlane_b32 s37, v4, 1
-; CHECK-NEXT:    v_readlane_b32 s38, v4, 2
-; CHECK-NEXT:    v_readlane_b32 s39, v4, 3
-; CHECK-NEXT:    v_readlane_b32 s40, v4, 4
-; CHECK-NEXT:    image_sample_lz v5, v[1:2], s[44:51], s[20:23] dmask:0x1
+; CHECK-NEXT:    v_readlane_b32 s37, v7, 1
+; CHECK-NEXT:    v_readlane_b32 s38, v7, 2
+; CHECK-NEXT:    v_readlane_b32 s39, v7, 3
+; CHECK-NEXT:    v_readlane_b32 s40, v7, 4
+; CHECK-NEXT:    image_sample_lz v3, v[1:2], s[44:51], s[20:23] dmask:0x1
 ; CHECK-NEXT:    v_mov_b32_e32 v2, 0
-; CHECK-NEXT:    v_readlane_b32 s41, v4, 5
-; CHECK-NEXT:    v_readlane_b32 s42, v4, 6
-; CHECK-NEXT:    v_readlane_b32 s43, v4, 7
+; CHECK-NEXT:    v_readlane_b32 s41, v7, 5
+; CHECK-NEXT:    v_readlane_b32 s42, v7, 6
+; CHECK-NEXT:    v_readlane_b32 s43, v7, 7
 ; CHECK-NEXT:  .LBB0_2: ; %bb50
 ; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    v_readlane_b32 s36, v4, 32
-; CHECK-NEXT:    v_readlane_b32 s40, v4, 36
-; CHECK-NEXT:    v_readlane_b32 s41, v4, 37
-; CHECK-NEXT:    v_readlane_b32 s42, v4, 38
-; CHECK-NEXT:    v_readlane_b32 s43, v4, 39
+; CHECK-NEXT:    v_readlane_b32 s36, v7, 32
+; CHECK-NEXT:    v_readlane_b32 s40, v7, 36
+; CHECK-NEXT:    v_readlane_b32 s41, v7, 37
+; CHECK-NEXT:    v_readlane_b32 s42, v7, 38
+; CHECK-NEXT:    v_readlane_b32 s43, v7, 39
 ; CHECK-NEXT:    s_mov_b32 s21, s20
 ; CHECK-NEXT:    s_mov_b32 s22, s20
 ; CHECK-NEXT:    s_mov_b32 s23, s20
-; CHECK-NEXT:    v_readlane_b32 s37, v4, 33
-; CHECK-NEXT:    v_readlane_b32 s38, v4, 34
+; CHECK-NEXT:    v_readlane_b32 s37, v7, 33
+; CHECK-NEXT:    v_readlane_b32 s38, v7, 34
 ; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
-; CHECK-NEXT:    image_sample_lz v6, v[1:2], s[60:67], s[40:43] dmask:0x1
-; CHECK-NEXT:    v_readlane_b32 s39, v4, 35
+; CHECK-NEXT:    image_sample_lz v4, v[1:2], s[60:67], s[40:43] dmask:0x1
+; CHECK-NEXT:    v_readlane_b32 s39, v7, 35
 ; CHECK-NEXT:    image_sample_lz v1, v[1:2], s[12:19], s[20:23] dmask:0x1
 ; CHECK-NEXT:    s_waitcnt vmcnt(0)
-; CHECK-NEXT:    v_sub_f32_e32 v1, v1, v6
+; CHECK-NEXT:    v_sub_f32_e32 v1, v1, v4
 ; CHECK-NEXT:    v_mul_f32_e32 v1, v1, v0
-; CHECK-NEXT:    v_mul_f32_e32 v1, v1, v5
+; CHECK-NEXT:    v_mul_f32_e32 v1, v1, v3
 ; CHECK-NEXT:    s_mov_b64 vcc, vcc
 ; CHECK-NEXT:    s_cbranch_vccnz .LBB0_2
 ; CHECK-NEXT:  .LBB0_3: ; %Flow14
 ; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
-; CHECK-NEXT:    v_readlane_b32 s12, v4, 32
-; CHECK-NEXT:    v_readlane_b32 s13, v4, 33
-; CHECK-NEXT:    v_readlane_b32 s14, v4, 34
-; CHECK-NEXT:    v_readlane_b32 s15, v4, 35
-; CHECK-NEXT:    v_readlane_b32 s16, v4, 36
-; CHECK-NEXT:    v_readlane_b32 s17, v4, 37
-; CHECK-NEXT:    v_readlane_b32 s18, v4, 38
-; CHECK-NEXT:    v_readlane_b32 s19, v4, 39
-; CHECK-NEXT:    v_writelane_b32 v4, s4, 40
-; CHECK-NEXT:    v_writelane_b32 v4, s5, 41
-; CHECK-NEXT:    v_writelane_b32 v4, s6, 42
-; CHECK-NEXT:    v_writelane_b32 v4, s7, 43
-; CHECK-NEXT:    v_writelane_b32 v4, s8, 44
-; CHECK-NEXT:    v_writelane_b32 v4, s9, 45
-; CHECK-NEXT:    v_writelane_b32 v4, s10, 46
-; CHECK-NEXT:    v_writelane_b32 v4, s11, 47
-; CHECK-NEXT:    v_writelane_b32 v4, s12, 48
-; CHECK-NEXT:    v_writelane_b32 v4, s13, 49
-; CHECK-NEXT:    v_writelane_b32 v4, s14, 50
-; CHECK-NEXT:    v_writelane_b32 v4, s15, 51
-; CHECK-NEXT:    v_writelane_b32 v4, s16, 52
-; CHECK-NEXT:    v_writelane_b32 v4, s17, 53
-; CHECK-NEXT:    v_writelane_b32 v4, s18, 54
-; CHECK-NEXT:    v_writelane_b32 v4, s19, 55
-; CHECK-NEXT:    v_writelane_b32 v4, s52, 56
-; CHECK-NEXT:    v_writelane_b32 v3, s60, 0
-; CHECK-NEXT:    v_writelane_b32 v4, s53, 57
-; CHECK-NEXT:    v_writelane_b32 v3, s61, 1
-; CHECK-NEXT:    v_writelane_b32 v4, s54, 58
-; CHECK-NEXT:    v_writelane_b32 v3, s62, 2
-; CHECK-NEXT:    v_writelane_b32 v4, s55, 59
-; CHECK-NEXT:    v_writelane_b32 v3, s63, 3
-; CHECK-NEXT:    v_writelane_b32 v4, s56, 60
-; CHECK-NEXT:    v_writelane_b32 v3, s64, 4
-; CHECK-NEXT:    v_writelane_b32 v4, s57, 61
-; CHECK-NEXT:    v_writelane_b32 v3, s65, 5
-; CHECK-NEXT:    v_writelane_b32 v4, s58, 62
-; CHECK-NEXT:    v_writelane_b32 v3, s66, 6
-; CHECK-NEXT:    v_writelane_b32 v4, s59, 63
-; CHECK-NEXT:    v_writelane_b32 v3, s67, 7
+; CHECK-NEXT:    v_readlane_b32 s12, v7, 32
+; CHECK-NEXT:    v_readlane_b32 s13, v7, 33
+; CHECK-NEXT:    v_readlane_b32 s14, v7, 34
+; CHECK-NEXT:    v_readlane_b32 s15, v7, 35
+; CHECK-NEXT:    v_readlane_b32 s16, v7, 36
+; CHECK-NEXT:    v_readlane_b32 s17, v7, 37
+; CHECK-NEXT:    v_readlane_b32 s18, v7, 38
+; CHECK-NEXT:    v_readlane_b32 s19, v7, 39
+; CHECK-NEXT:    v_writelane_b32 v7, s4, 40
+; CHECK-NEXT:    v_writelane_b32 v7, s5, 41
+; CHECK-NEXT:    v_writelane_b32 v7, s6, 42
+; CHECK-NEXT:    v_writelane_b32 v7, s7, 43
+; CHECK-NEXT:    v_writelane_b32 v7, s8, 44
+; CHECK-NEXT:    v_writelane_b32 v7, s9, 45
+; CHECK-NEXT:    v_writelane_b32 v7, s10, 46
+; CHECK-NEXT:    v_writelane_b32 v7, s11, 47
+; CHECK-NEXT:    v_writelane_b32 v7, s12, 48
+; CHECK-NEXT:    v_writelane_b32 v7, s13, 49
+; CHECK-NEXT:    v_writelane_b32 v7, s14, 50
+; CHECK-NEXT:    v_writelane_b32 v7, s15, 51
+; CHECK-NEXT:    v_writelane_b32 v7, s16, 52
+; CHECK-NEXT:    v_writelane_b32 v7, s17, 53
+; CHECK-NEXT:    v_writelane_b32 v7, s18, 54
+; CHECK-NEXT:    v_writelane_b32 v7, s19, 55
+; CHECK-NEXT:    ; implicit-def: $vgpr6 : SGPR spill to VGPR lane
+; CHECK-NEXT:    v_writelane_b32 v7, s52, 56
+; CHECK-NEXT:    v_writelane_b32 v6, s60, 0
+; CHECK-NEXT:    v_writelane_b32 v7, s53, 57
+; CHECK-NEXT:    v_writelane_b32 v6, s61, 1
+; CHECK-NEXT:    v_writelane_b32 v7, s54, 58
+; CHECK-NEXT:    v_writelane_b32 v6, s62, 2
+; CHECK-NEXT:    v_writelane_b32 v7, s55, 59
+; CHECK-NEXT:    v_writelane_b32 v6, s63, 3
+; CHECK-NEXT:    v_writelane_b32 v7, s56, 60
+; CHECK-NEXT:    v_writelane_b32 v6, s64, 4
+; CHECK-NEXT:    v_writelane_b32 v7, s57, 61
+; CHECK-NEXT:    v_writelane_b32 v6, s65, 5
+; CHECK-NEXT:    v_writelane_b32 v7, s58, 62
+; CHECK-NEXT:    v_writelane_b32 v6, s66, 6
+; CHECK-NEXT:    v_writelane_b32 v7, s59, 63
+; CHECK-NEXT:    v_writelane_b32 v6, s67, 7
 ; CHECK-NEXT:    s_andn2_saveexec_b64 s[20:21], s[26:27]
 ; CHECK-NEXT:    s_cbranch_execz .LBB0_10
 ; CHECK-NEXT:  ; %bb.4: ; %bb32
@@ -219,68 +219,68 @@ define void @main(i1 %arg) #0 {
 ; CHECK-NEXT:    s_mov_b32 s8, 0
 ; CHECK-NEXT:    s_mov_b32 s9, s8
 ; CHECK-NEXT:    v_mov_b32_e32 v0, s8
-; CHECK-NEXT:    v_readlane_b32 s36, v4, 0
+; CHECK-NEXT:    v_readlane_b32 s36, v7, 0
 ; CHECK-NEXT:    v_mov_b32_e32 v1, s9
 ; CHECK-NEXT:    s_mov_b32 s10, s8
 ; CHECK-NEXT:    s_mov_b32 s11, s8
-; CHECK-NEXT:    v_readlane_b32 s37, v4, 1
-; CHECK-NEXT:    v_readlane_b32 s38, v4, 2
-; CHECK-NEXT:    v_readlane_b32 s39, v4, 3
-; CHECK-NEXT:    v_readlane_b32 s40, v4, 4
-; CHECK-NEXT:    v_readlane_b32 s41, v4, 5
-; CHECK-NEXT:    v_readlane_b32 s42, v4, 6
-; CHECK-NEXT:    v_readlane_b32 s43, v4, 7
-; CHECK-NEXT:    v_readlane_b32 s44, v4, 8
-; CHECK-NEXT:    v_readlane_b32 s45, v4, 9
-; CHECK-NEXT:    v_readlane_b32 s46, v4, 10
-; CHECK-NEXT:    v_readlane_b32 s47, v4, 11
-; CHECK-NEXT:    v_readlane_b32 s48, v4, 12
-; CHECK-NEXT:    v_readlane_b32 s49, v4, 13
-; CHECK-NEXT:    v_readlane_b32 s50, v4, 14
-; CHECK-NEXT:    v_readlane_b32 s51, v4, 15
-; CHECK-NEXT:    image_sample_lz v5, v[0:1], s[36:43], s[8:11] dmask:0x1
-; CHECK-NEXT:    v_readlane_b32 s36, v4, 16
-; CHECK-NEXT:    v_readlane_b32 s44, v4, 24
-; CHECK-NEXT:    v_readlane_b32 s45, v4, 25
-; CHECK-NEXT:    v_readlane_b32 s46, v4, 26
-; CHECK-NEXT:    v_readlane_b32 s47, v4, 27
-; CHECK-NEXT:    v_readlane_b32 s48, v4, 28
-; CHECK-NEXT:    v_readlane_b32 s49, v4, 29
-; CHECK-NEXT:    v_readlane_b32 s50, v4, 30
-; CHECK-NEXT:    v_readlane_b32 s51, v4, 31
-; CHECK-NEXT:    v_mov_b32_e32 v6, 0
-; CHECK-NEXT:    v_mov_b32_e32 v7, v6
-; CHECK-NEXT:    v_readlane_b32 s37, v4, 17
-; CHECK-NEXT:    v_readlane_b32 s38, v4, 18
-; CHECK-NEXT:    v_readlane_b32 s39, v4, 19
+; CHECK-NEXT:    v_readlane_b32 s37, v7, 1
+; CHECK-NEXT:    v_readlane_b32 s38, v7, 2
+; CHECK-NEXT:    v_readlane_b32 s39, v7, 3
+; CHECK-NEXT:    v_readlane_b32 s40, v7, 4
+; CHECK-NEXT:    v_readlane_b32 s41, v7, 5
+; CHECK-NEXT:    v_readlane_b32 s42, v7, 6
+; CHECK-NEXT:    v_readlane_b32 s43, v7, 7
+; CHECK-NEXT:    v_readlane_b32 s44, v7, 8
+; CHECK-NEXT:    v_readlane_b32 s45, v7, 9
+; CHECK-NEXT:    v_readlane_b32 s46, v7, 10
+; CHECK-NEXT:    v_readlane_b32 s47, v7, 11
+; CHECK-NEXT:    v_readlane_b32 s48, v7, 12
+; CHECK-NEXT:    v_readlane_b32 s49, v7, 13
+; CHECK-NEXT:    v_readlane_b32 s50, v7, 14
+; CHECK-NEXT:    v_readlane_b32 s51, v7, 15
+; CHECK-NEXT:    image_sample_lz v2, v[0:1], s[36:43], s[8:11] dmask:0x1
+; CHECK-NEXT:    v_readlane_b32 s36, v7, 16
+; CHECK-NEXT:    v_readlane_b32 s44, v7, 24
+; CHECK-NEXT:    v_readlane_b32 s45, v7, 25
+; CHECK-NEXT:    v_readlane_b32 s46, v7, 26
+; CHECK-NEXT:    v_readlane_b32 s47, v7, 27
+; CHECK-NEXT:    v_readlane_b32 s48, v7, 28
+; CHECK-NEXT:    v_readlane_b32 s49, v7, 29
+; CHECK-NEXT:    v_readlane_b32 s50, v7, 30
+; CHECK-NEXT:    v_readlane_b32 s51, v7, 31
+; CHECK-NEXT:    v_mov_b32_e32 v3, 0
+; CHECK-NEXT:    v_mov_b32_e32 v4, v3
+; CHECK-NEXT:    v_readlane_b32 s37, v7, 17
+; CHECK-NEXT:    v_readlane_b32 s38, v7, 18
+; CHECK-NEXT:    v_readlane_b32 s39, v7, 19
 ; CHECK-NEXT:    image_sample_lz v0, v[0:1], s[44:51], s[12:15] dmask:0x1
-; CHECK-NEXT:    v_readlane_b32 s40, v4, 20
-; CHECK-NEXT:    v_readlane_b32 s41, v4, 21
-; CHECK-NEXT:    v_readlane_b32 s42, v4, 22
-; CHECK-NEXT:    v_readlane_b32 s43, v4, 23
+; CHECK-NEXT:    v_readlane_b32 s40, v7, 20
+; CHECK-NEXT:    v_readlane_b32 s41, v7, 21
+; CHECK-NEXT:    v_readlane_b32 s42, v7, 22
+; CHECK-NEXT:    v_readlane_b32 s43, v7, 23
 ; CHECK-NEXT:    s_waitcnt vmcnt(1)
-; CHECK-NEXT:    buffer_store_dwordx3 v[5:7], off, s[8:11], 0
+; CHECK-NEXT:    buffer_store_dwordx3 v[2:4], off, s[8:11], 0
 ; CHECK-NEXT:    s_waitcnt vmcnt(1)
 ; CHECK-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0
 ; CHECK-NEXT:    ; implicit-def: $vgpr0
 ; CHECK-NEXT:  .LBB0_6: ; %Flow12
 ; CHECK-NEXT:    s_or_saveexec_b64 s[4:5], s[22:23]
-; CHECK-NEXT:    v_readlane_b32 s52, v4, 40
-; CHECK-NEXT:    v_readlane_b32 s53, v4, 41
-; CHECK-NEXT:    v_readlane_b32 s54, v4, 42
-; CHECK-NEXT:    v_readlane_b32 s55, v4, 43
-; CHECK-NEXT:    v_readlane_b32 s56, v4, 44
-; CHECK-NEXT:    v_readlane_b32 s57, v4, 45
-; CHECK-NEXT:    v_readlane_b32 s58, v4, 46
-; CHECK-NEXT:    v_readlane_b32 s59, v4, 47
-; CHECK-NEXT:    v_readlane_b32 s60, v4, 48
-; CHECK-NEXT:    v_readlane_b32 s61, v4, 49
-; CHECK-NEXT:    v_readlane_b32 s62, v4, 50
-; CHECK-NEXT:    v_readlane_b32 s63, v4, 51
-; CHECK-NEXT:    v_readlane_b32 s64, v4, 52
-; CHECK-NEXT:    v_readlane_b32 s65, v4, 53
-; CHECK-NEXT:    v_readlane_b32 s66, v4, 54
-; CHECK-NEXT:    v_readlane_b32 s67, v4, 55
+; CHECK-NEXT:    v_readlane_b32 s52, v7, 40
+; CHECK-NEXT:    v_readlane_b32 s53, v7, 41
+; CHECK-NEXT:    v_readlane_b32 s54, v7, 42
+; CHECK-NEXT:    v_readlane_b32 s55, v7, 43
+; CHECK-NEXT:    v_readlane_b32 s56, v7, 44
+; CHECK-NEXT:    v_readlane_b32 s57, v7, 45
+; CHECK-NEXT:    v_readlane_b32 s58, v7, 46
+; CHECK-NEXT:    v_readlane_b32 s59, v7, 47
+; CHECK-NEXT:    v_readlane_b32 s60, v7, 48
+; CHECK-NEXT:    v_readlane_b32 s61, v7, 49
+; CHECK-NEXT:    v_readlane_b32 s62, v7, 50
+; CHECK-NEXT:    v_readlane_b32 s63, v7, 51
+; CHECK-NEXT:    v_readlane_b32 s64, v7, 52
+; CHECK-NEXT:    v_readlane_b32 s65, v7, 53
+; CHECK-NEXT:    v_readlane_b32 s66, v7, 54
+; CHECK-NEXT:    v_readlane_b32 s67, v7, 55
 ; CHECK-NEXT:    s_xor_b64 exec, exec, s[4:5]
 ; CHECK-NEXT:    s_cbranch_execz .LBB0_9
 ; CHECK-NEXT:  ; %bb.7: ; %bb33.preheader
@@ -288,32 +288,32 @@ define void @main(i1 %arg) #0 {
 ; CHECK-NEXT:    s_mov_b32 s6, s8
 ; CHECK-NEXT:    s_mov_b32 s7, s8
 ; CHECK-NEXT:    v_mov_b32_e32 v1, s6
-; CHECK-NEXT:    v_readlane_b32 s36, v4, 56
+; CHECK-NEXT:    v_readlane_b32 s36, v7, 56
 ; CHECK-NEXT:    s_mov_b32 s9, s8
 ; CHECK-NEXT:    s_mov_b32 s10, s8
 ; CHECK-NEXT:    s_mov_b32 s11, s8
 ; CHECK-NEXT:    v_mov_b32_e32 v2, s7
-; CHECK-NEXT:    v_readlane_b32 s37, v4, 57
-; CHECK-NEXT:    v_readlane_b32 s38, v4, 58
-; CHECK-NEXT:    v_readlane_b32 s39, v4, 59
-; CHECK-NEXT:    v_readlane_b32 s40, v4, 60
-; CHECK-NEXT:    v_readlane_b32 s41, v4, 61
-; CHECK-NEXT:    v_readlane_b32 s42, v4, 62
-; CHECK-NEXT:    v_readlane_b32 s43, v4, 63
+; CHECK-NEXT:    v_readlane_b32 s37, v7, 57
+; CHECK-NEXT:    v_readlane_b32 s38, v7, 58
+; CHECK-NEXT:    v_readlane_b32 s39, v7, 59
+; CHECK-NEXT:    v_readlane_b32 s40, v7, 60
+; CHECK-NEXT:    v_readlane_b32 s41, v7, 61
+; CHECK-NEXT:    v_readlane_b32 s42, v7, 62
+; CHECK-NEXT:    v_readlane_b32 s43, v7, 63
 ; CHECK-NEXT:    s_nop 4
-; CHECK-NEXT:    image_sample_lz v5, v[1:2], s[36:43], s[8:11] dmask:0x1
-; CHECK-NEXT:    image_sample_lz v6, v[1:2], s[52:59], s[8:11] dmask:0x1
+; CHECK-NEXT:    image_sample_lz v3, v[1:2], s[36:43], s[8:11] dmask:0x1
+; CHECK-NEXT:    image_sample_lz v4, v[1:2], s[52:59], s[8:11] dmask:0x1
 ; CHECK-NEXT:    ; kill: killed $vgpr1_vgpr2
 ; CHECK-NEXT:    s_mov_b64 s[12:13], s[36:37]
 ; CHECK-NEXT:    s_and_b64 vcc, exec, 0
-; CHECK-NEXT:    v_readlane_b32 s44, v3, 0
-; CHECK-NEXT:    v_readlane_b32 s45, v3, 1
-; CHECK-NEXT:    v_readlane_b32 s46, v3, 2
-; CHECK-NEXT:    v_readlane_b32 s47, v3, 3
-; CHECK-NEXT:    v_readlane_b32 s48, v3, 4
-; CHECK-NEXT:    v_readlane_b32 s49, v3, 5
-; CHECK-NEXT:    v_readlane_b32 s50, v3, 6
-; CHECK-NEXT:    v_readlane_b32 s51, v3, 7
+; CHECK-NEXT:    v_readlane_b32 s44, v6, 0
+; CHECK-NEXT:    v_readlane_b32 s45, v6, 1
+; CHECK-NEXT:    v_readlane_b32 s46, v6, 2
+; CHECK-NEXT:    v_readlane_b32 s47, v6, 3
+; CHECK-NEXT:    v_readlane_b32 s48, v6, 4
+; CHECK-NEXT:    v_readlane_b32 s49, v6, 5
+; CHECK-NEXT:    v_readlane_b32 s50, v6, 6
+; CHECK-NEXT:    v_readlane_b32 s51, v6, 7
 ; CHECK-NEXT:    s_mov_b64 s[14:15], s[38:39]
 ; CHECK-NEXT:    s_mov_b64 s[16:17], s[40:41]
 ; CHECK-NEXT:    s_mov_b64 s[18:19], s[42:43]
@@ -321,7 +321,7 @@ define void @main(i1 %arg) #0 {
 ; CHECK-NEXT:    ; kill: killed $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59
 ; CHECK-NEXT:    ; kill: killed $sgpr8_sgpr9_sgpr10 killed $sgpr11
 ; CHECK-NEXT:    s_waitcnt vmcnt(0)
-; CHECK-NEXT:    v_sub_f32_e32 v1, v6, v5
+; CHECK-NEXT:    v_sub_f32_e32 v1, v4, v3
 ; CHECK-NEXT:    v_mul_f32_e32 v0, v1, v0
 ; CHECK-NEXT:    v_mov_b32_e32 v1, 0
 ; CHECK-NEXT:  .LBB0_8: ; %bb33
@@ -334,46 +334,44 @@ define void @main(i1 %arg) #0 {
 ; CHECK-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; CHECK-NEXT:  .LBB0_10: ; %UnifiedReturnBlock
 ; CHECK-NEXT:    s_or_b64 exec, exec, s[20:21]
-; CHECK-NEXT:    v_readlane_b32 s67, v8, 33
-; CHECK-NEXT:    v_readlane_b32 s66, v8, 32
-; CHECK-NEXT:    v_readlane_b32 s65, v8, 31
-; CHECK-NEXT:    v_readlane_b32 s64, v8, 30
-; CHECK-NEXT:    v_readlane_b32 s63, v8, 29
-; CHECK-NEXT:    v_readlane_b32 s62, v8, 28
-; CHECK-NEXT:    v_readlane_b32 s61, v8, 27
-; CHECK-NEXT:    v_readlane_b32 s60, v8, 26
-; CHECK-NEXT:    v_readlane_b32 s59, v8, 25
-; CHECK-NEXT:    v_readlane_b32 s58, v8, 24
-; CHECK-NEXT:    v_readlane_b32 s57, v8, 23
-; CHECK-NEXT:    v_readlane_b32 s56, v8, 22
-; CHECK-NEXT:    v_readlane_b32 s55, v8, 21
-; CHECK-NEXT:    v_readlane_b32 s54, v8, 20
-; CHECK-NEXT:    v_readlane_b32 s53, v8, 19
-; CHECK-NEXT:    v_readlane_b32 s52, v8, 18
-; CHECK-NEXT:    v_readlane_b32 s51, v8, 17
-; CHECK-NEXT:    v_readlane_b32 s50, v8, 16
-; CHECK-NEXT:    v_readlane_b32 s49, v8, 15
-; CHECK-NEXT:    v_readlane_b32 s48, v8, 14
-; CHECK-NEXT:    v_readlane_b32 s47, v8, 13
-; CHECK-NEXT:    v_readlane_b32 s46, v8, 12
-; CHECK-NEXT:    v_readlane_b32 s45, v8, 11
-; CHECK-NEXT:    v_readlane_b32 s44, v8, 10
-; CHECK-NEXT:    v_readlane_b32 s43, v8, 9
-; CHECK-NEXT:    v_readlane_b32 s42, v8, 8
-; CHECK-NEXT:    v_readlane_b32 s41, v8, 7
-; CHECK-NEXT:    v_readlane_b32 s40, v8, 6
-; CHECK-NEXT:    v_readlane_b32 s39, v8, 5
-; CHECK-NEXT:    v_readlane_b32 s38, v8, 4
-; CHECK-NEXT:    v_readlane_b32 s37, v8, 3
-; CHECK-NEXT:    v_readlane_b32 s36, v8, 2
-; CHECK-NEXT:    v_readlane_b32 s31, v8, 1
-; CHECK-NEXT:    v_readlane_b32 s30, v8, 0
-; CHECK-NEXT:    ; kill: killed $vgpr4
-; CHECK-NEXT:    ; kill: killed $vgpr3
+; CHECK-NEXT:    v_readlane_b32 s67, v5, 33
+; CHECK-NEXT:    v_readlane_b32 s66, v5, 32
+; CHECK-NEXT:    v_readlane_b32 s65, v5, 31
+; CHECK-NEXT:    v_readlane_b32 s64, v5, 30
+; CHECK-NEXT:    v_readlane_b32 s63, v5, 29
+; CHECK-NEXT:    v_readlane_b32 s62, v5, 28
+; CHECK-NEXT:    v_readlane_b32 s61, v5, 27
+; CHECK-NEXT:    v_readlane_b32 s60, v5, 26
+; CHECK-NEXT:    v_readlane_b32 s59, v5, 25
+; CHECK-NEXT:    v_readlane_b32 s58, v5, 24
+; CHECK-NEXT:    v_readlane_b32 s57, v5, 23
+; CHECK-NEXT:    v_readlane_b32 s56, v5, 22
+; CHECK-NEXT:    v_readlane_b32 s55, v5, 21
+; CHECK-NEXT:    v_readlane_b32 s54, v5, 20
+; CHECK-NEXT:    v_readlane_b32 s53, v5, 19
+; CHECK-NEXT:    v_readlane_b32 s52, v5, 18
+; CHECK-NEXT:    v_readlane_b32 s51, v5, 17
+; CHECK-NEXT:    v_readlane_b32 s50, v5, 16
+; CHECK-NEXT:    v_readlane_b32 s49, v5, 15
+; CHECK-NEXT:    v_readlane_b32 s48, v5, 14
+; CHECK-NEXT:    v_readlane_b32 s47, v5, 13
+; CHECK-NEXT:    v_readlane_b32 s46, v5, 12
+; CHECK-NEXT:    v_readlane_b32 s45, v5, 11
+; CHECK-NEXT:    v_readlane_b32 s44, v5, 10
+; CHECK-NEXT:    v_readlane_b32 s43, v5, 9
+; CHECK-NEXT:    v_readlane_b32 s42, v5, 8
+; CHECK-NEXT:    v_readlane_b32 s41, v5, 7
+; CHECK-NEXT:    v_readlane_b32 s40, v5, 6
+; CHECK-NEXT:    v_readlane_b32 s39, v5, 5
+; CHECK-NEXT:    v_readlane_b32 s38, v5, 4
+; CHECK-NEXT:    v_readlane_b32 s37, v5, 3
+; CHECK-NEXT:    v_readlane_b32 s36, v5, 2
+; CHECK-NEXT:    v_readlane_b32 s31, v5, 1
+; CHECK-NEXT:    v_readlane_b32 s30, v5, 0
 ; CHECK-NEXT:    s_xor_saveexec_b64 s[4:5], -1
-; CHECK-NEXT:    buffer_load_dword v8, off, s[0:3], s32 ; 4-byte Folded Reload
-; CHECK-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
-; CHECK-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
+; CHECK-NEXT:    buffer_load_dword v5, off, s[0:3], s32 ; 4-byte Folded Reload
+; CHECK-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
+; CHECK-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
 ; CHECK-NEXT:    s_mov_b64 exec, s[4:5]
 ; CHECK-NEXT:    s_waitcnt vmcnt(0)
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/illegal-eviction-assert.mir b/llvm/test/CodeGen/AMDGPU/illegal-eviction-assert.mir
index 96fb7cfeb2775..40089ed82b5db 100644
--- a/llvm/test/CodeGen/AMDGPU/illegal-eviction-assert.mir
+++ b/llvm/test/CodeGen/AMDGPU/illegal-eviction-assert.mir
@@ -1,5 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: not llc -mtriple=amdgcn -mcpu=gfx900 -start-before=greedy,0 -stop-after=virtregrewriter,1 -o - 2>%t.err %s | FileCheck %s
+# RUN: not llc -mtriple=amdgcn -mcpu=gfx900 -start-before=greedy,0 -stop-after=virtregrewriter,2 -o - 2>%t.err %s | FileCheck %s
 # RUN: FileCheck -check-prefix=ERR %s < %t.err
 
 # This testcase cannot be compiled. An attempted eviction legality
diff --git a/llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll b/llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll
index ec446f1f3bf27..7b195f8e86220 100644
--- a/llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll
+++ b/llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll
@@ -13,22 +13,14 @@ define amdgpu_kernel void @test_kernel(i32 %val) #0 {
 ; CHECK-NEXT:    s_addc_u32 flat_scratch_hi, s11, 0
 ; CHECK-NEXT:    s_add_u32 s0, s0, s15
 ; CHECK-NEXT:    s_addc_u32 s1, s1, 0
-; CHECK-NEXT:    ; implicit-def: $vgpr3 : SGPR spill to VGPR lane
 ; CHECK-NEXT:    s_mov_b64 s[10:11], s[8:9]
 ; CHECK-NEXT:    v_mov_b32_e32 v3, v2
 ; CHECK-NEXT:    v_mov_b32_e32 v2, v1
 ; CHECK-NEXT:    v_mov_b32_e32 v1, v0
-; CHECK-NEXT:    s_or_saveexec_b64 s[34:35], -1
-; CHECK-NEXT:    s_add_i32 s8, s33, 0x100200
-; CHECK-NEXT:    buffer_load_dword v0, off, s[0:3], s8 ; 4-byte Folded Reload
-; CHECK-NEXT:    s_mov_b64 exec, s[34:35]
 ; CHECK-NEXT:    s_load_dword s8, s[6:7], 0x0
-; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    v_writelane_b32 v0, s8, 0
-; CHECK-NEXT:    s_or_saveexec_b64 s[34:35], -1
-; CHECK-NEXT:    s_add_i32 s8, s33, 0x100200
-; CHECK-NEXT:    buffer_store_dword v0, off, s[0:3], s8 ; 4-byte Folded Spill
-; CHECK-NEXT:    s_mov_b64 exec, s[34:35]
+; CHECK-NEXT:    ; implicit-def: $vgpr40 : SGPR spill to VGPR lane
+; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; CHECK-NEXT:    v_writelane_b32 v40, s8, 0
 ; CHECK-NEXT:    ;;#ASMSTART
 ; CHECK-NEXT:    ; def vgpr10
 ; CHECK-NEXT:    ;;#ASMEND
@@ -62,14 +54,9 @@ define amdgpu_kernel void @test_kernel(i32 %val) #0 {
 ; CHECK-NEXT:    s_mov_b64 s[2:3], s[22:23]
 ; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
 ; CHECK-NEXT:    s_swappc_b64 s[30:31], s[16:17]
-; CHECK-NEXT:    s_or_saveexec_b64 s[34:35], -1
-; CHECK-NEXT:    s_add_i32 s4, s33, 0x100200
-; CHECK-NEXT:    buffer_load_dword v0, off, s[0:3], s4 ; 4-byte Folded Reload
-; CHECK-NEXT:    s_mov_b64 exec, s[34:35]
 ; CHECK-NEXT:    s_add_i32 s4, s33, 0x100100
 ; CHECK-NEXT:    buffer_load_dword v10, off, s[0:3], s4 ; 4-byte Folded Reload
-; CHECK-NEXT:    s_waitcnt vmcnt(1)
-; CHECK-NEXT:    v_readlane_b32 s4, v0, 0
+; CHECK-NEXT:    v_readlane_b32 s4, v40, 0
 ; CHECK-NEXT:    s_mov_b32 s5, 0
 ; CHECK-NEXT:    s_cmp_eq_u32 s4, s5
 ; CHECK-NEXT:    v_mov_b32_e32 v0, 0x4000
@@ -77,24 +64,14 @@ define amdgpu_kernel void @test_kernel(i32 %val) #0 {
 ; CHECK-NEXT:    buffer_store_dword v10, v0, s[0:3], s33 offen ; 4-byte Folded Spill
 ; CHECK-NEXT:    s_cbranch_scc1 .LBB0_2
 ; CHECK-NEXT:  ; %bb.1: ; %store
-; CHECK-NEXT:    s_or_saveexec_b64 s[34:35], -1
-; CHECK-NEXT:    s_add_i32 s4, s33, 0x100200
-; CHECK-NEXT:    buffer_load_dword v0, off, s[0:3], s4 ; 4-byte Folded Reload
-; CHECK-NEXT:    s_mov_b64 exec, s[34:35]
 ; CHECK-NEXT:    s_add_i32 s4, s33, 0x100000
-; CHECK-NEXT:    buffer_load_dword v2, off, s[0:3], s4 ; 4-byte Folded Reload
+; CHECK-NEXT:    buffer_load_dword v1, off, s[0:3], s4 ; 4-byte Folded Reload
 ; CHECK-NEXT:    ; implicit-def: $sgpr4
-; CHECK-NEXT:    v_mov_b32_e32 v1, s4
+; CHECK-NEXT:    v_mov_b32_e32 v0, s4
 ; CHECK-NEXT:    s_waitcnt vmcnt(0)
-; CHECK-NEXT:    ds_write_b32 v1, v2
-; CHECK-NEXT:    ; kill: killed $vgpr0
+; CHECK-NEXT:    ds_write_b32 v0, v1
 ; CHECK-NEXT:    s_endpgm
 ; CHECK-NEXT:  .LBB0_2: ; %end
-; CHECK-NEXT:    s_or_saveexec_b64 s[34:35], -1
-; CHECK-NEXT:    s_add_i32 s4, s33, 0x100200
-; CHECK-NEXT:    buffer_load_dword v0, off, s[0:3], s4 ; 4-byte Folded Reload
-; CHECK-NEXT:    s_mov_b64 exec, s[34:35]
-; CHECK-NEXT:    ; kill: killed $vgpr0
 ; CHECK-NEXT:    s_endpgm
   %arr = alloca < 1339 x i32>, align 8192, addrspace(5)
   %cmp = icmp ne i32 %val, 0
diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
index 0ff5dd3680dfa..57b9f8f1a9d6b 100644
--- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
@@ -121,6 +121,8 @@
 ; GCN-O0-NEXT:        SI Pre-allocate WWM Registers
 ; GCN-O0-NEXT:        Fast Register Allocator
 ; GCN-O0-NEXT:        SI Lower WWM Copies
+; GCN-O0-NEXT:        AMDGPU Reserve WWM Registers
+; GCN-O0-NEXT:        Fast Register Allocator
 ; GCN-O0-NEXT:        SI Fix VGPR copies
 ; GCN-O0-NEXT:        Remove Redundant DEBUG_VALUE analysis
 ; GCN-O0-NEXT:        Fixup Statepoint Caller Saved
@@ -368,6 +370,11 @@
 ; GCN-O1-NEXT:        SI Pre-allocate WWM Registers
 ; GCN-O1-NEXT:        Greedy Register Allocator
 ; GCN-O1-NEXT:        SI Lower WWM Copies
+; GCN-O1-NEXT:        Virtual Register Rewriter
+; GCN-O1-NEXT:        AMDGPU Reserve WWM Registers
+; GCN-O1-NEXT:        Virtual Register Map
+; GCN-O1-NEXT:        Live Register Matrix
+; GCN-O1-NEXT:        Greedy Register Allocator
 ; GCN-O1-NEXT:        GCN NSA Reassign
 ; GCN-O1-NEXT:        Virtual Register Rewriter
 ; GCN-O1-NEXT:        AMDGPU Mark Last Scratch Load
@@ -671,6 +678,11 @@
 ; GCN-O1-OPTS-NEXT:        SI Pre-allocate WWM Registers
 ; GCN-O1-OPTS-NEXT:        Greedy Register Allocator
 ; GCN-O1-OPTS-NEXT:        SI Lower WWM Copies
+; GCN-O1-OPTS-NEXT:        Virtual Register Rewriter
+; GCN-O1-OPTS-NEXT:        AMDGPU Reserve WWM Registers
+; GCN-O1-OPTS-NEXT:        Virtual Register Map
+; GCN-O1-OPTS-NEXT:        Live Register Matrix
+; GCN-O1-OPTS-NEXT:        Greedy Register Allocator
 ; GCN-O1-OPTS-NEXT:        GCN NSA Reassign
 ; GCN-O1-OPTS-NEXT:        Virtual Register Rewriter
 ; GCN-O1-OPTS-NEXT:        AMDGPU Mark Last Scratch Load
@@ -980,6 +992,11 @@
 ; GCN-O2-NEXT:        SI Pre-allocate WWM Registers
 ; GCN-O2-NEXT:        Greedy Register Allocator
 ; GCN-O2-NEXT:        SI Lower WWM Copies
+; GCN-O2-NEXT:        Virtual Register Rewriter
+; GCN-O2-NEXT:        AMDGPU Reserve WWM Registers
+; GCN-O2-NEXT:        Virtual Register Map
+; GCN-O2-NEXT:        Live Register Matrix
+; GCN-O2-NEXT:        Greedy Register Allocator
 ; GCN-O2-NEXT:        GCN NSA Reassign
 ; GCN-O2-NEXT:        Virtual Register Rewriter
 ; GCN-O2-NEXT:        AMDGPU Mark Last Scratch Load
@@ -1301,6 +1318,11 @@
 ; GCN-O3-NEXT:        SI Pre-allocate WWM Registers
 ; GCN-O3-NEXT:        Greedy Register Allocator
 ; GCN-O3-NEXT:        SI Lower WWM Copies
+; GCN-O3-NEXT:        Virtual Register Rewriter
+; GCN-O3-NEXT:        AMDGPU Reserve WWM Registers
+; GCN-O3-NEXT:        Virtual Register Map
+; GCN-O3-NEXT:        Live Register Matrix
+; GCN-O3-NEXT:        Greedy Register Allocator
 ; GCN-O3-NEXT:        GCN NSA Reassign
 ; GCN-O3-NEXT:        Virtual Register Rewriter
 ; GCN-O3-NEXT:        AMDGPU Mark Last Scratch Load
diff --git a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll
index b5ee6689f8dc3..76624702e9fec 100644
--- a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll
+++ b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll
@@ -141,86 +141,81 @@ define float @mubuf_vgpr(<4 x i32> %i, i32 %c) #0 {
 ; W64-O0:       ; %bb.0:
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; W64-O0-NEXT:    s_xor_saveexec_b64 s[4:5], -1
-; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
-; W64-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
+; W64-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    s_mov_b64 exec, s[4:5]
-; W64-O0-NEXT:    ; implicit-def: $vgpr5 : SGPR spill to VGPR lane
 ; W64-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
-; W64-O0-NEXT:    v_mov_b32_e32 v5, v3
-; W64-O0-NEXT:    v_mov_b32_e32 v6, v2
-; W64-O0-NEXT:    v_mov_b32_e32 v7, v1
-; W64-O0-NEXT:    v_mov_b32_e32 v1, v0
-; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
-; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
+; W64-O0-NEXT:    v_mov_b32_e32 v4, v3
+; W64-O0-NEXT:    v_mov_b32_e32 v5, v2
+; W64-O0-NEXT:    v_mov_b32_e32 v6, v1
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4
-; W64-O0-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2_vgpr3_vgpr4 killed $exec
-; W64-O0-NEXT:    v_mov_b32_e32 v2, v7
-; W64-O0-NEXT:    v_mov_b32_e32 v3, v6
-; W64-O0-NEXT:    v_mov_b32_e32 v4, v5
-; W64-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; W64-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec
+; W64-O0-NEXT:    v_mov_b32_e32 v1, v6
+; W64-O0-NEXT:    v_mov_b32_e32 v2, v5
+; W64-O0-NEXT:    v_mov_b32_e32 v3, v4
+; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
-; W64-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
-; W64-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
-; W64-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
+; W64-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
+; W64-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; W64-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4_sgpr5_sgpr6_sgpr7
 ; W64-O0-NEXT:    s_mov_b32 s4, 0
-; W64-O0-NEXT:    v_writelane_b32 v0, s4, 0
+; W64-O0-NEXT:    ; implicit-def: $vgpr7 : SGPR spill to VGPR lane
+; W64-O0-NEXT:    v_writelane_b32 v7, s4, 0
 ; W64-O0-NEXT:    s_mov_b64 s[4:5], exec
-; W64-O0-NEXT:    v_writelane_b32 v0, s4, 1
-; W64-O0-NEXT:    v_writelane_b32 v0, s5, 2
+; W64-O0-NEXT:    v_writelane_b32 v7, s4, 1
+; W64-O0-NEXT:    v_writelane_b32 v7, s5, 2
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; W64-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
 ; W64-O0-NEXT:  .LBB0_1: ; =>This Inner Loop Header: Depth=1
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
-; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
-; W64-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
-; W64-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
-; W64-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
-; W64-O0-NEXT:    v_readfirstlane_b32 s8, v1
-; W64-O0-NEXT:    v_readfirstlane_b32 s12, v2
+; W64-O0-NEXT:    v_readfirstlane_b32 s8, v0
+; W64-O0-NEXT:    v_readfirstlane_b32 s12, v1
 ; W64-O0-NEXT:    s_mov_b32 s4, s8
 ; W64-O0-NEXT:    s_mov_b32 s5, s12
-; W64-O0-NEXT:    v_cmp_eq_u64_e64 s[4:5], s[4:5], v[1:2]
-; W64-O0-NEXT:    v_readfirstlane_b32 s7, v3
-; W64-O0-NEXT:    v_readfirstlane_b32 s6, v4
+; W64-O0-NEXT:    v_cmp_eq_u64_e64 s[4:5], s[4:5], v[0:1]
+; W64-O0-NEXT:    v_readfirstlane_b32 s7, v2
+; W64-O0-NEXT:    v_readfirstlane_b32 s6, v3
 ; W64-O0-NEXT:    s_mov_b32 s10, s7
 ; W64-O0-NEXT:    s_mov_b32 s11, s6
-; W64-O0-NEXT:    v_cmp_eq_u64_e64 s[10:11], s[10:11], v[3:4]
+; W64-O0-NEXT:    v_cmp_eq_u64_e64 s[10:11], s[10:11], v[2:3]
 ; W64-O0-NEXT:    s_and_b64 s[4:5], s[4:5], s[10:11]
 ; W64-O0-NEXT:    ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11
 ; W64-O0-NEXT:    s_mov_b32 s9, s12
 ; W64-O0-NEXT:    s_mov_b32 s10, s7
 ; W64-O0-NEXT:    s_mov_b32 s11, s6
-; W64-O0-NEXT:    v_writelane_b32 v0, s8, 3
-; W64-O0-NEXT:    v_writelane_b32 v0, s9, 4
-; W64-O0-NEXT:    v_writelane_b32 v0, s10, 5
-; W64-O0-NEXT:    v_writelane_b32 v0, s11, 6
+; W64-O0-NEXT:    v_writelane_b32 v7, s8, 3
+; W64-O0-NEXT:    v_writelane_b32 v7, s9, 4
+; W64-O0-NEXT:    v_writelane_b32 v7, s10, 5
+; W64-O0-NEXT:    v_writelane_b32 v7, s11, 6
 ; W64-O0-NEXT:    s_and_saveexec_b64 s[4:5], s[4:5]
-; W64-O0-NEXT:    v_writelane_b32 v0, s4, 7
-; W64-O0-NEXT:    v_writelane_b32 v0, s5, 8
+; W64-O0-NEXT:    v_writelane_b32 v7, s4, 7
+; W64-O0-NEXT:    v_writelane_b32 v7, s5, 8
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; W64-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
 ; W64-O0-NEXT:  ; %bb.2: ; in Loop: Header=BB0_1 Depth=1
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
-; W64-O0-NEXT:    v_readlane_b32 s4, v1, 7
-; W64-O0-NEXT:    v_readlane_b32 s5, v1, 8
-; W64-O0-NEXT:    v_readlane_b32 s8, v1, 3
-; W64-O0-NEXT:    v_readlane_b32 s9, v1, 4
-; W64-O0-NEXT:    v_readlane_b32 s10, v1, 5
-; W64-O0-NEXT:    v_readlane_b32 s11, v1, 6
-; W64-O0-NEXT:    v_readlane_b32 s6, v1, 0
+; W64-O0-NEXT:    v_readlane_b32 s4, v7, 7
+; W64-O0-NEXT:    v_readlane_b32 s5, v7, 8
+; W64-O0-NEXT:    v_readlane_b32 s8, v7, 3
+; W64-O0-NEXT:    v_readlane_b32 s9, v7, 4
+; W64-O0-NEXT:    v_readlane_b32 s10, v7, 5
+; W64-O0-NEXT:    v_readlane_b32 s11, v7, 6
+; W64-O0-NEXT:    v_readlane_b32 s6, v7, 0
 ; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
 ; W64-O0-NEXT:    s_nop 2
@@ -231,19 +226,15 @@ define float @mubuf_vgpr(<4 x i32> %i, i32 %c) #0 {
 ; W64-O0-NEXT:    s_cbranch_execnz .LBB0_1
 ; W64-O0-NEXT:  ; %bb.3:
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
-; W64-O0-NEXT:    v_readlane_b32 s4, v1, 1
-; W64-O0-NEXT:    v_readlane_b32 s5, v1, 2
+; W64-O0-NEXT:    v_readlane_b32 s4, v7, 1
+; W64-O0-NEXT:    v_readlane_b32 s5, v7, 2
 ; W64-O0-NEXT:    s_mov_b64 exec, s[4:5]
 ; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
-; W64-O0-NEXT:    ; kill: killed $vgpr1
 ; W64-O0-NEXT:    s_xor_saveexec_b64 s[4:5], -1
-; W64-O0-NEXT:    s_waitcnt vmcnt(0)
-; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
-; W64-O0-NEXT:    s_nop 0
-; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_mov_b64 exec, s[4:5]
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
 ; W64-O0-NEXT:    s_setpc_b64 s[30:31]
@@ -495,34 +486,32 @@ define void @mubuf_vgpr_adjacent_in_block(<4 x i32> %i, <4 x i32> %j, i32 %c, pt
 ; W64-O0:       ; %bb.0: ; %entry
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; W64-O0-NEXT:    s_xor_saveexec_b64 s[4:5], -1
-; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
-; W64-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
+; W64-O0-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    s_mov_b64 exec, s[4:5]
-; W64-O0-NEXT:    ; implicit-def: $vgpr13 : SGPR spill to VGPR lane
 ; W64-O0-NEXT:    buffer_store_dword v11, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    buffer_store_dword v9, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
-; W64-O0-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
+; W64-O0-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
+; W64-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    v_mov_b32_e32 v13, v4
-; W64-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
-; W64-O0-NEXT:    v_mov_b32_e32 v7, v3
-; W64-O0-NEXT:    v_mov_b32_e32 v8, v2
-; W64-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
-; W64-O0-NEXT:    v_mov_b32_e32 v9, v1
-; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
-; W64-O0-NEXT:    v_mov_b32_e32 v3, v0
-; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
-; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
+; W64-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
+; W64-O0-NEXT:    v_mov_b32_e32 v6, v3
+; W64-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
+; W64-O0-NEXT:    v_mov_b32_e32 v7, v2
+; W64-O0-NEXT:    v_mov_b32_e32 v8, v1
+; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
+; W64-O0-NEXT:    v_mov_b32_e32 v2, v0
+; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4
 ; W64-O0-NEXT:    ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14_vgpr15_vgpr16 killed $exec
 ; W64-O0-NEXT:    v_mov_b32_e32 v14, v5
-; W64-O0-NEXT:    v_mov_b32_e32 v15, v6
 ; W64-O0-NEXT:    s_waitcnt vmcnt(3)
-; W64-O0-NEXT:    v_mov_b32_e32 v16, v4
+; W64-O0-NEXT:    v_mov_b32_e32 v15, v4
+; W64-O0-NEXT:    s_waitcnt vmcnt(2)
+; W64-O0-NEXT:    v_mov_b32_e32 v16, v3
 ; W64-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
 ; W64-O0-NEXT:    buffer_store_dword v14, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
@@ -532,186 +521,185 @@ define void @mubuf_vgpr_adjacent_in_block(<4 x i32> %i, <4 x i32> %j, i32 %c, pt
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4
-; W64-O0-NEXT:    ; kill: def $vgpr3 killed $vgpr3 def $vgpr3_vgpr4_vgpr5_vgpr6 killed $exec
-; W64-O0-NEXT:    v_mov_b32_e32 v4, v9
-; W64-O0-NEXT:    v_mov_b32_e32 v5, v8
-; W64-O0-NEXT:    v_mov_b32_e32 v6, v7
-; W64-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
+; W64-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3_vgpr4_vgpr5 killed $exec
+; W64-O0-NEXT:    v_mov_b32_e32 v3, v8
+; W64-O0-NEXT:    v_mov_b32_e32 v4, v7
+; W64-O0-NEXT:    v_mov_b32_e32 v5, v6
+; W64-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
-; W64-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
-; W64-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
-; W64-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
+; W64-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
+; W64-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
+; W64-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4
-; W64-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; W64-O0-NEXT:    v_mov_b32_e32 v3, v12
-; W64-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; W64-O0-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
+; W64-O0-NEXT:    v_mov_b32_e32 v2, v12
+; W64-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
-; W64-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
+; W64-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4
-; W64-O0-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
-; W64-O0-NEXT:    v_mov_b32_e32 v2, v10
-; W64-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; W64-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
+; W64-O0-NEXT:    v_mov_b32_e32 v1, v10
+; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
-; W64-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
+; W64-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4_sgpr5
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4_sgpr5
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4_sgpr5_sgpr6_sgpr7
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4_sgpr5_sgpr6_sgpr7
 ; W64-O0-NEXT:    s_mov_b32 s4, 0
-; W64-O0-NEXT:    v_writelane_b32 v0, s4, 0
+; W64-O0-NEXT:    ; implicit-def: $vgpr17 : SGPR spill to VGPR lane
+; W64-O0-NEXT:    v_writelane_b32 v17, s4, 0
 ; W64-O0-NEXT:    s_mov_b64 s[4:5], exec
-; W64-O0-NEXT:    v_writelane_b32 v0, s4, 1
-; W64-O0-NEXT:    v_writelane_b32 v0, s5, 2
+; W64-O0-NEXT:    v_writelane_b32 v17, s4, 1
+; W64-O0-NEXT:    v_writelane_b32 v17, s5, 2
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; W64-O0-NEXT:    buffer_store_dword v17, off, s[0:3], s32 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
 ; W64-O0-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v17, off, s[0:3], s32 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
-; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
-; W64-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
-; W64-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
-; W64-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
-; W64-O0-NEXT:    v_readfirstlane_b32 s8, v1
-; W64-O0-NEXT:    v_readfirstlane_b32 s12, v2
+; W64-O0-NEXT:    v_readfirstlane_b32 s8, v0
+; W64-O0-NEXT:    v_readfirstlane_b32 s12, v1
 ; W64-O0-NEXT:    s_mov_b32 s4, s8
 ; W64-O0-NEXT:    s_mov_b32 s5, s12
-; W64-O0-NEXT:    v_cmp_eq_u64_e64 s[4:5], s[4:5], v[1:2]
-; W64-O0-NEXT:    v_readfirstlane_b32 s7, v3
-; W64-O0-NEXT:    v_readfirstlane_b32 s6, v4
+; W64-O0-NEXT:    v_cmp_eq_u64_e64 s[4:5], s[4:5], v[0:1]
+; W64-O0-NEXT:    v_readfirstlane_b32 s7, v2
+; W64-O0-NEXT:    v_readfirstlane_b32 s6, v3
 ; W64-O0-NEXT:    s_mov_b32 s10, s7
 ; W64-O0-NEXT:    s_mov_b32 s11, s6
-; W64-O0-NEXT:    v_cmp_eq_u64_e64 s[10:11], s[10:11], v[3:4]
+; W64-O0-NEXT:    v_cmp_eq_u64_e64 s[10:11], s[10:11], v[2:3]
 ; W64-O0-NEXT:    s_and_b64 s[4:5], s[4:5], s[10:11]
 ; W64-O0-NEXT:    ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11
 ; W64-O0-NEXT:    s_mov_b32 s9, s12
 ; W64-O0-NEXT:    s_mov_b32 s10, s7
 ; W64-O0-NEXT:    s_mov_b32 s11, s6
-; W64-O0-NEXT:    v_writelane_b32 v0, s8, 3
-; W64-O0-NEXT:    v_writelane_b32 v0, s9, 4
-; W64-O0-NEXT:    v_writelane_b32 v0, s10, 5
-; W64-O0-NEXT:    v_writelane_b32 v0, s11, 6
+; W64-O0-NEXT:    v_writelane_b32 v17, s8, 3
+; W64-O0-NEXT:    v_writelane_b32 v17, s9, 4
+; W64-O0-NEXT:    v_writelane_b32 v17, s10, 5
+; W64-O0-NEXT:    v_writelane_b32 v17, s11, 6
 ; W64-O0-NEXT:    s_and_saveexec_b64 s[4:5], s[4:5]
-; W64-O0-NEXT:    v_writelane_b32 v0, s4, 7
-; W64-O0-NEXT:    v_writelane_b32 v0, s5, 8
+; W64-O0-NEXT:    v_writelane_b32 v17, s4, 7
+; W64-O0-NEXT:    v_writelane_b32 v17, s5, 8
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; W64-O0-NEXT:    buffer_store_dword v17, off, s[0:3], s32 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
 ; W64-O0-NEXT:  ; %bb.2: ; in Loop: Header=BB1_1 Depth=1
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v17, off, s[0:3], s32 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
-; W64-O0-NEXT:    v_readlane_b32 s4, v1, 7
-; W64-O0-NEXT:    v_readlane_b32 s5, v1, 8
-; W64-O0-NEXT:    v_readlane_b32 s8, v1, 3
-; W64-O0-NEXT:    v_readlane_b32 s9, v1, 4
-; W64-O0-NEXT:    v_readlane_b32 s10, v1, 5
-; W64-O0-NEXT:    v_readlane_b32 s11, v1, 6
-; W64-O0-NEXT:    v_readlane_b32 s6, v1, 0
-; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
+; W64-O0-NEXT:    v_readlane_b32 s4, v17, 7
+; W64-O0-NEXT:    v_readlane_b32 s5, v17, 8
+; W64-O0-NEXT:    v_readlane_b32 s8, v17, 3
+; W64-O0-NEXT:    v_readlane_b32 s9, v17, 4
+; W64-O0-NEXT:    v_readlane_b32 s10, v17, 5
+; W64-O0-NEXT:    v_readlane_b32 s11, v17, 6
+; W64-O0-NEXT:    v_readlane_b32 s6, v17, 0
+; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
 ; W64-O0-NEXT:    s_nop 2
 ; W64-O0-NEXT:    buffer_load_format_x v0, v0, s[8:11], s6 idxen
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
-; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
+; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    s_xor_b64 exec, exec, s[4:5]
 ; W64-O0-NEXT:    s_cbranch_execnz .LBB1_1
 ; W64-O0-NEXT:  ; %bb.3:
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v17, off, s[0:3], s32 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
-; W64-O0-NEXT:    v_readlane_b32 s4, v0, 1
-; W64-O0-NEXT:    v_readlane_b32 s5, v0, 2
+; W64-O0-NEXT:    v_readlane_b32 s4, v17, 1
+; W64-O0-NEXT:    v_readlane_b32 s5, v17, 2
 ; W64-O0-NEXT:    s_mov_b64 exec, s[4:5]
 ; W64-O0-NEXT:    s_mov_b64 s[4:5], exec
-; W64-O0-NEXT:    v_writelane_b32 v0, s4, 9
-; W64-O0-NEXT:    v_writelane_b32 v0, s5, 10
+; W64-O0-NEXT:    v_writelane_b32 v17, s4, 9
+; W64-O0-NEXT:    v_writelane_b32 v17, s5, 10
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; W64-O0-NEXT:    buffer_store_dword v17, off, s[0:3], s32 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
 ; W64-O0-NEXT:  .LBB1_4: ; =>This Inner Loop Header: Depth=1
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v17, off, s[0:3], s32 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
-; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
-; W64-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
-; W64-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
-; W64-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
-; W64-O0-NEXT:    v_readfirstlane_b32 s8, v1
-; W64-O0-NEXT:    v_readfirstlane_b32 s12, v2
+; W64-O0-NEXT:    v_readfirstlane_b32 s8, v0
+; W64-O0-NEXT:    v_readfirstlane_b32 s12, v1
 ; W64-O0-NEXT:    s_mov_b32 s4, s8
 ; W64-O0-NEXT:    s_mov_b32 s5, s12
-; W64-O0-NEXT:    v_cmp_eq_u64_e64 s[4:5], s[4:5], v[1:2]
-; W64-O0-NEXT:    v_readfirstlane_b32 s7, v3
-; W64-O0-NEXT:    v_readfirstlane_b32 s6, v4
+; W64-O0-NEXT:    v_cmp_eq_u64_e64 s[4:5], s[4:5], v[0:1]
+; W64-O0-NEXT:    v_readfirstlane_b32 s7, v2
+; W64-O0-NEXT:    v_readfirstlane_b32 s6, v3
 ; W64-O0-NEXT:    s_mov_b32 s10, s7
 ; W64-O0-NEXT:    s_mov_b32 s11, s6
-; W64-O0-NEXT:    v_cmp_eq_u64_e64 s[10:11], s[10:11], v[3:4]
+; W64-O0-NEXT:    v_cmp_eq_u64_e64 s[10:11], s[10:11], v[2:3]
 ; W64-O0-NEXT:    s_and_b64 s[4:5], s[4:5], s[10:11]
 ; W64-O0-NEXT:    ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11
 ; W64-O0-NEXT:    s_mov_b32 s9, s12
 ; W64-O0-NEXT:    s_mov_b32 s10, s7
 ; W64-O0-NEXT:    s_mov_b32 s11, s6
-; W64-O0-NEXT:    v_writelane_b32 v0, s8, 11
-; W64-O0-NEXT:    v_writelane_b32 v0, s9, 12
-; W64-O0-NEXT:    v_writelane_b32 v0, s10, 13
-; W64-O0-NEXT:    v_writelane_b32 v0, s11, 14
+; W64-O0-NEXT:    v_writelane_b32 v17, s8, 11
+; W64-O0-NEXT:    v_writelane_b32 v17, s9, 12
+; W64-O0-NEXT:    v_writelane_b32 v17, s10, 13
+; W64-O0-NEXT:    v_writelane_b32 v17, s11, 14
 ; W64-O0-NEXT:    s_and_saveexec_b64 s[4:5], s[4:5]
-; W64-O0-NEXT:    v_writelane_b32 v0, s4, 15
-; W64-O0-NEXT:    v_writelane_b32 v0, s5, 16
+; W64-O0-NEXT:    v_writelane_b32 v17, s4, 15
+; W64-O0-NEXT:    v_writelane_b32 v17, s5, 16
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; W64-O0-NEXT:    buffer_store_dword v17, off, s[0:3], s32 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
 ; W64-O0-NEXT:  ; %bb.5: ; in Loop: Header=BB1_4 Depth=1
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v17, off, s[0:3], s32 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
-; W64-O0-NEXT:    v_readlane_b32 s4, v1, 15
-; W64-O0-NEXT:    v_readlane_b32 s5, v1, 16
-; W64-O0-NEXT:    v_readlane_b32 s8, v1, 11
-; W64-O0-NEXT:    v_readlane_b32 s9, v1, 12
-; W64-O0-NEXT:    v_readlane_b32 s10, v1, 13
-; W64-O0-NEXT:    v_readlane_b32 s11, v1, 14
-; W64-O0-NEXT:    v_readlane_b32 s6, v1, 0
-; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
+; W64-O0-NEXT:    v_readlane_b32 s4, v17, 15
+; W64-O0-NEXT:    v_readlane_b32 s5, v17, 16
+; W64-O0-NEXT:    v_readlane_b32 s8, v17, 11
+; W64-O0-NEXT:    v_readlane_b32 s9, v17, 12
+; W64-O0-NEXT:    v_readlane_b32 s10, v17, 13
+; W64-O0-NEXT:    v_readlane_b32 s11, v17, 14
+; W64-O0-NEXT:    v_readlane_b32 s6, v17, 0
+; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
 ; W64-O0-NEXT:    s_nop 2
 ; W64-O0-NEXT:    buffer_load_format_x v0, v0, s[8:11], s6 idxen
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
-; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
+; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    s_xor_b64 exec, exec, s[4:5]
 ; W64-O0-NEXT:    s_cbranch_execnz .LBB1_4
 ; W64-O0-NEXT:  ; %bb.6:
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v17, off, s[0:3], s32 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
-; W64-O0-NEXT:    v_readlane_b32 s4, v0, 9
-; W64-O0-NEXT:    v_readlane_b32 s5, v0, 10
+; W64-O0-NEXT:    v_readlane_b32 s4, v17, 9
+; W64-O0-NEXT:    v_readlane_b32 s5, v17, 10
 ; W64-O0-NEXT:    s_mov_b64 exec, s[4:5]
-; W64-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
-; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
-; W64-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
-; W64-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
-; W64-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
-; W64-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
-; W64-O0-NEXT:    global_store_dword v[4:5], v6, off
+; W64-O0-NEXT:    global_store_dword v[3:4], v5, off
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
-; W64-O0-NEXT:    global_store_dword v[1:2], v3, off
+; W64-O0-NEXT:    global_store_dword v[0:1], v2, off
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
-; W64-O0-NEXT:    ; kill: killed $vgpr0
 ; W64-O0-NEXT:    s_xor_saveexec_b64 s[4:5], -1
-; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
-; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v17, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_mov_b64 exec, s[4:5]
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
 ; W64-O0-NEXT:    s_setpc_b64 s[30:31]
@@ -1019,120 +1007,116 @@ define void @mubuf_vgpr_outside_entry(<4 x i32> %i, <4 x i32> %j, i32 %c, ptr ad
 ; W64-O0:       ; %bb.0: ; %entry
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; W64-O0-NEXT:    s_xor_saveexec_b64 s[4:5], -1
-; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
-; W64-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
+; W64-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    s_mov_b64 exec, s[4:5]
-; W64-O0-NEXT:    ; implicit-def: $vgpr8 : SGPR spill to VGPR lane
 ; W64-O0-NEXT:    buffer_store_dword v31, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    buffer_store_dword v11, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
-; W64-O0-NEXT:    v_mov_b32_e32 v8, v6
-; W64-O0-NEXT:    v_mov_b32_e32 v9, v5
-; W64-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
+; W64-O0-NEXT:    v_mov_b32_e32 v8, v5
+; W64-O0-NEXT:    v_mov_b32_e32 v5, v4
+; W64-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_nop 0
-; W64-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
-; W64-O0-NEXT:    v_mov_b32_e32 v10, v3
-; W64-O0-NEXT:    v_mov_b32_e32 v11, v2
-; W64-O0-NEXT:    v_mov_b32_e32 v13, v1
-; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
-; W64-O0-NEXT:    v_mov_b32_e32 v6, v0
-; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
-; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
+; W64-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
+; W64-O0-NEXT:    v_mov_b32_e32 v9, v3
+; W64-O0-NEXT:    v_mov_b32_e32 v10, v2
+; W64-O0-NEXT:    v_mov_b32_e32 v11, v1
+; W64-O0-NEXT:    v_mov_b32_e32 v5, v0
+; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4
-; W64-O0-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2_vgpr3_vgpr4 killed $exec
-; W64-O0-NEXT:    v_mov_b32_e32 v2, v9
-; W64-O0-NEXT:    v_mov_b32_e32 v3, v8
-; W64-O0-NEXT:    v_mov_b32_e32 v4, v7
+; W64-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec
+; W64-O0-NEXT:    v_mov_b32_e32 v1, v8
+; W64-O0-NEXT:    v_mov_b32_e32 v2, v6
+; W64-O0-NEXT:    v_mov_b32_e32 v3, v7
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4
-; W64-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7_vgpr8_vgpr9 killed $exec
-; W64-O0-NEXT:    v_mov_b32_e32 v7, v13
-; W64-O0-NEXT:    v_mov_b32_e32 v8, v11
-; W64-O0-NEXT:    v_mov_b32_e32 v9, v10
-; W64-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
+; W64-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6_vgpr7_vgpr8 killed $exec
+; W64-O0-NEXT:    v_mov_b32_e32 v6, v11
+; W64-O0-NEXT:    v_mov_b32_e32 v7, v10
+; W64-O0-NEXT:    v_mov_b32_e32 v8, v9
+; W64-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
-; W64-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
-; W64-O0-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
-; W64-O0-NEXT:    buffer_store_dword v9, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
+; W64-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
+; W64-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
+; W64-O0-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4
-; W64-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
-; W64-O0-NEXT:    v_mov_b32_e32 v6, v12
-; W64-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
+; W64-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
+; W64-O0-NEXT:    v_mov_b32_e32 v5, v12
+; W64-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
-; W64-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
-; W64-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
+; W64-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
+; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
-; W64-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
-; W64-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
-; W64-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
+; W64-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; W64-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
+; W64-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4_sgpr5_sgpr6_sgpr7
 ; W64-O0-NEXT:    ;;#ASMSTART
 ; W64-O0-NEXT:    s_mov_b32 s4, 17
 ; W64-O0-NEXT:    ;;#ASMEND
 ; W64-O0-NEXT:    s_mov_b32 s5, s4
-; W64-O0-NEXT:    v_writelane_b32 v0, s5, 0
+; W64-O0-NEXT:    ; implicit-def: $vgpr13 : SGPR spill to VGPR lane
+; W64-O0-NEXT:    v_writelane_b32 v13, s5, 0
 ; W64-O0-NEXT:    s_mov_b32 s5, 0
-; W64-O0-NEXT:    v_writelane_b32 v0, s5, 1
-; W64-O0-NEXT:    v_mov_b32_e32 v1, s4
-; W64-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; W64-O0-NEXT:    v_writelane_b32 v13, s5, 1
+; W64-O0-NEXT:    v_mov_b32_e32 v0, s4
+; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    s_mov_b64 s[4:5], exec
-; W64-O0-NEXT:    v_writelane_b32 v0, s4, 2
-; W64-O0-NEXT:    v_writelane_b32 v0, s5, 3
+; W64-O0-NEXT:    v_writelane_b32 v13, s4, 2
+; W64-O0-NEXT:    v_writelane_b32 v13, s5, 3
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; W64-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
 ; W64-O0-NEXT:  .LBB2_1: ; =>This Inner Loop Header: Depth=1
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
-; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
-; W64-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
-; W64-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
-; W64-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
-; W64-O0-NEXT:    v_readfirstlane_b32 s8, v1
-; W64-O0-NEXT:    v_readfirstlane_b32 s12, v2
+; W64-O0-NEXT:    v_readfirstlane_b32 s8, v0
+; W64-O0-NEXT:    v_readfirstlane_b32 s12, v1
 ; W64-O0-NEXT:    s_mov_b32 s4, s8
 ; W64-O0-NEXT:    s_mov_b32 s5, s12
-; W64-O0-NEXT:    v_cmp_eq_u64_e64 s[4:5], s[4:5], v[1:2]
-; W64-O0-NEXT:    v_readfirstlane_b32 s7, v3
-; W64-O0-NEXT:    v_readfirstlane_b32 s6, v4
+; W64-O0-NEXT:    v_cmp_eq_u64_e64 s[4:5], s[4:5], v[0:1]
+; W64-O0-NEXT:    v_readfirstlane_b32 s7, v2
+; W64-O0-NEXT:    v_readfirstlane_b32 s6, v3
 ; W64-O0-NEXT:    s_mov_b32 s10, s7
 ; W64-O0-NEXT:    s_mov_b32 s11, s6
-; W64-O0-NEXT:    v_cmp_eq_u64_e64 s[10:11], s[10:11], v[3:4]
+; W64-O0-NEXT:    v_cmp_eq_u64_e64 s[10:11], s[10:11], v[2:3]
 ; W64-O0-NEXT:    s_and_b64 s[4:5], s[4:5], s[10:11]
 ; W64-O0-NEXT:    ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11
 ; W64-O0-NEXT:    s_mov_b32 s9, s12
 ; W64-O0-NEXT:    s_mov_b32 s10, s7
 ; W64-O0-NEXT:    s_mov_b32 s11, s6
-; W64-O0-NEXT:    v_writelane_b32 v0, s8, 4
-; W64-O0-NEXT:    v_writelane_b32 v0, s9, 5
-; W64-O0-NEXT:    v_writelane_b32 v0, s10, 6
-; W64-O0-NEXT:    v_writelane_b32 v0, s11, 7
+; W64-O0-NEXT:    v_writelane_b32 v13, s8, 4
+; W64-O0-NEXT:    v_writelane_b32 v13, s9, 5
+; W64-O0-NEXT:    v_writelane_b32 v13, s10, 6
+; W64-O0-NEXT:    v_writelane_b32 v13, s11, 7
 ; W64-O0-NEXT:    s_and_saveexec_b64 s[4:5], s[4:5]
-; W64-O0-NEXT:    v_writelane_b32 v0, s4, 8
-; W64-O0-NEXT:    v_writelane_b32 v0, s5, 9
+; W64-O0-NEXT:    v_writelane_b32 v13, s4, 8
+; W64-O0-NEXT:    v_writelane_b32 v13, s5, 9
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; W64-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
 ; W64-O0-NEXT:  ; %bb.2: ; in Loop: Header=BB2_1 Depth=1
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
-; W64-O0-NEXT:    v_readlane_b32 s4, v1, 8
-; W64-O0-NEXT:    v_readlane_b32 s5, v1, 9
-; W64-O0-NEXT:    v_readlane_b32 s8, v1, 4
-; W64-O0-NEXT:    v_readlane_b32 s9, v1, 5
-; W64-O0-NEXT:    v_readlane_b32 s10, v1, 6
-; W64-O0-NEXT:    v_readlane_b32 s11, v1, 7
-; W64-O0-NEXT:    v_readlane_b32 s6, v1, 1
+; W64-O0-NEXT:    v_readlane_b32 s4, v13, 8
+; W64-O0-NEXT:    v_readlane_b32 s5, v13, 9
+; W64-O0-NEXT:    v_readlane_b32 s8, v13, 4
+; W64-O0-NEXT:    v_readlane_b32 s9, v13, 5
+; W64-O0-NEXT:    v_readlane_b32 s10, v13, 6
+; W64-O0-NEXT:    v_readlane_b32 s11, v13, 7
+; W64-O0-NEXT:    v_readlane_b32 s6, v13, 1
 ; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
 ; W64-O0-NEXT:    s_nop 2
@@ -1143,92 +1127,92 @@ define void @mubuf_vgpr_outside_entry(<4 x i32> %i, <4 x i32> %j, i32 %c, ptr ad
 ; W64-O0-NEXT:    s_cbranch_execnz .LBB2_1
 ; W64-O0-NEXT:  ; %bb.3:
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
-; W64-O0-NEXT:    v_readlane_b32 s6, v0, 2
-; W64-O0-NEXT:    v_readlane_b32 s7, v0, 3
+; W64-O0-NEXT:    v_readlane_b32 s6, v13, 2
+; W64-O0-NEXT:    v_readlane_b32 s7, v13, 3
 ; W64-O0-NEXT:    s_mov_b64 exec, s[6:7]
-; W64-O0-NEXT:    v_readlane_b32 s4, v0, 1
-; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
-; W64-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
+; W64-O0-NEXT:    v_readlane_b32 s4, v13, 1
+; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_mov_b32 s5, 0x3ff
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
-; W64-O0-NEXT:    v_and_b32_e64 v2, v2, s5
-; W64-O0-NEXT:    v_cmp_eq_u32_e64 s[6:7], v2, s4
-; W64-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
+; W64-O0-NEXT:    v_and_b32_e64 v1, v1, s5
+; W64-O0-NEXT:    v_cmp_eq_u32_e64 s[6:7], v1, s4
+; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    s_mov_b64 s[4:5], exec
-; W64-O0-NEXT:    v_writelane_b32 v0, s4, 10
-; W64-O0-NEXT:    v_writelane_b32 v0, s5, 11
+; W64-O0-NEXT:    v_writelane_b32 v13, s4, 10
+; W64-O0-NEXT:    v_writelane_b32 v13, s5, 11
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; W64-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
 ; W64-O0-NEXT:    s_and_b64 s[4:5], s[4:5], s[6:7]
 ; W64-O0-NEXT:    s_mov_b64 exec, s[4:5]
 ; W64-O0-NEXT:    s_cbranch_execz .LBB2_8
 ; W64-O0-NEXT:  ; %bb.4: ; %bb1
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
-; W64-O0-NEXT:    v_readlane_b32 s4, v0, 0
+; W64-O0-NEXT:    v_readlane_b32 s4, v13, 0
 ; W64-O0-NEXT:    s_mov_b32 s5, 0
-; W64-O0-NEXT:    v_writelane_b32 v0, s5, 12
-; W64-O0-NEXT:    v_mov_b32_e32 v1, s4
-; W64-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
+; W64-O0-NEXT:    v_writelane_b32 v13, s5, 12
+; W64-O0-NEXT:    v_mov_b32_e32 v0, s4
+; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4_sgpr5_sgpr6_sgpr7
 ; W64-O0-NEXT:    s_mov_b64 s[4:5], exec
-; W64-O0-NEXT:    v_writelane_b32 v0, s4, 13
-; W64-O0-NEXT:    v_writelane_b32 v0, s5, 14
+; W64-O0-NEXT:    v_writelane_b32 v13, s4, 13
+; W64-O0-NEXT:    v_writelane_b32 v13, s5, 14
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; W64-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
 ; W64-O0-NEXT:  .LBB2_5: ; =>This Inner Loop Header: Depth=1
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
-; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
-; W64-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
-; W64-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
-; W64-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
-; W64-O0-NEXT:    v_readfirstlane_b32 s8, v1
-; W64-O0-NEXT:    v_readfirstlane_b32 s12, v2
+; W64-O0-NEXT:    v_readfirstlane_b32 s8, v0
+; W64-O0-NEXT:    v_readfirstlane_b32 s12, v1
 ; W64-O0-NEXT:    s_mov_b32 s4, s8
 ; W64-O0-NEXT:    s_mov_b32 s5, s12
-; W64-O0-NEXT:    v_cmp_eq_u64_e64 s[4:5], s[4:5], v[1:2]
-; W64-O0-NEXT:    v_readfirstlane_b32 s7, v3
-; W64-O0-NEXT:    v_readfirstlane_b32 s6, v4
+; W64-O0-NEXT:    v_cmp_eq_u64_e64 s[4:5], s[4:5], v[0:1]
+; W64-O0-NEXT:    v_readfirstlane_b32 s7, v2
+; W64-O0-NEXT:    v_readfirstlane_b32 s6, v3
 ; W64-O0-NEXT:    s_mov_b32 s10, s7
 ; W64-O0-NEXT:    s_mov_b32 s11, s6
-; W64-O0-NEXT:    v_cmp_eq_u64_e64 s[10:11], s[10:11], v[3:4]
+; W64-O0-NEXT:    v_cmp_eq_u64_e64 s[10:11], s[10:11], v[2:3]
 ; W64-O0-NEXT:    s_and_b64 s[4:5], s[4:5], s[10:11]
 ; W64-O0-NEXT:    ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11
 ; W64-O0-NEXT:    s_mov_b32 s9, s12
 ; W64-O0-NEXT:    s_mov_b32 s10, s7
 ; W64-O0-NEXT:    s_mov_b32 s11, s6
-; W64-O0-NEXT:    v_writelane_b32 v0, s8, 15
-; W64-O0-NEXT:    v_writelane_b32 v0, s9, 16
-; W64-O0-NEXT:    v_writelane_b32 v0, s10, 17
-; W64-O0-NEXT:    v_writelane_b32 v0, s11, 18
+; W64-O0-NEXT:    v_writelane_b32 v13, s8, 15
+; W64-O0-NEXT:    v_writelane_b32 v13, s9, 16
+; W64-O0-NEXT:    v_writelane_b32 v13, s10, 17
+; W64-O0-NEXT:    v_writelane_b32 v13, s11, 18
 ; W64-O0-NEXT:    s_and_saveexec_b64 s[4:5], s[4:5]
-; W64-O0-NEXT:    v_writelane_b32 v0, s4, 19
-; W64-O0-NEXT:    v_writelane_b32 v0, s5, 20
+; W64-O0-NEXT:    v_writelane_b32 v13, s4, 19
+; W64-O0-NEXT:    v_writelane_b32 v13, s5, 20
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; W64-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
 ; W64-O0-NEXT:  ; %bb.6: ; in Loop: Header=BB2_5 Depth=1
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
-; W64-O0-NEXT:    v_readlane_b32 s4, v1, 19
-; W64-O0-NEXT:    v_readlane_b32 s5, v1, 20
-; W64-O0-NEXT:    v_readlane_b32 s8, v1, 15
-; W64-O0-NEXT:    v_readlane_b32 s9, v1, 16
-; W64-O0-NEXT:    v_readlane_b32 s10, v1, 17
-; W64-O0-NEXT:    v_readlane_b32 s11, v1, 18
-; W64-O0-NEXT:    v_readlane_b32 s6, v1, 12
+; W64-O0-NEXT:    v_readlane_b32 s4, v13, 19
+; W64-O0-NEXT:    v_readlane_b32 s5, v13, 20
+; W64-O0-NEXT:    v_readlane_b32 s8, v13, 15
+; W64-O0-NEXT:    v_readlane_b32 s9, v13, 16
+; W64-O0-NEXT:    v_readlane_b32 s10, v13, 17
+; W64-O0-NEXT:    v_readlane_b32 s11, v13, 18
+; W64-O0-NEXT:    v_readlane_b32 s6, v13, 12
 ; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
 ; W64-O0-NEXT:    s_nop 2
@@ -1239,33 +1223,31 @@ define void @mubuf_vgpr_outside_entry(<4 x i32> %i, <4 x i32> %j, i32 %c, ptr ad
 ; W64-O0-NEXT:    s_cbranch_execnz .LBB2_5
 ; W64-O0-NEXT:  ; %bb.7:
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
-; W64-O0-NEXT:    v_readlane_b32 s4, v1, 13
-; W64-O0-NEXT:    v_readlane_b32 s5, v1, 14
+; W64-O0-NEXT:    v_readlane_b32 s4, v13, 13
+; W64-O0-NEXT:    v_readlane_b32 s5, v13, 14
 ; W64-O0-NEXT:    s_mov_b64 exec, s[4:5]
 ; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
 ; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
 ; W64-O0-NEXT:  .LBB2_8: ; %bb2
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
-; W64-O0-NEXT:    v_readlane_b32 s4, v0, 10
-; W64-O0-NEXT:    v_readlane_b32 s5, v0, 11
+; W64-O0-NEXT:    v_readlane_b32 s4, v13, 10
+; W64-O0-NEXT:    v_readlane_b32 s5, v13, 11
 ; W64-O0-NEXT:    s_or_b64 exec, exec, s[4:5]
-; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
-; W64-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
-; W64-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
-; W64-O0-NEXT:    global_store_dword v[1:2], v3, off
+; W64-O0-NEXT:    global_store_dword v[0:1], v2, off
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
-; W64-O0-NEXT:    ; kill: killed $vgpr0
 ; W64-O0-NEXT:    s_xor_saveexec_b64 s[4:5], -1
-; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
-; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v13, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_mov_b64 exec, s[4:5]
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
 ; W64-O0-NEXT:    s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll
index 162c47f879465..30a4662babeca 100644
--- a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll
+++ b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll
@@ -140,124 +140,112 @@ define float @mubuf_vgpr(ptr addrspace(8) %i, i32 %c) #0 {
 ; W64-O0:       ; %bb.0:
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; W64-O0-NEXT:    s_xor_saveexec_b64 s[4:5], -1
-; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
-; W64-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
+; W64-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    s_mov_b64 exec, s[4:5]
-; W64-O0-NEXT:    ; implicit-def: $vgpr5 : SGPR spill to VGPR lane
-; W64-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
-; W64-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
-; W64-O0-NEXT:    v_mov_b32_e32 v6, v2
-; W64-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
-; W64-O0-NEXT:    v_mov_b32_e32 v3, v1
-; W64-O0-NEXT:    v_mov_b32_e32 v1, v0
-; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
-; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
+; W64-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
+; W64-O0-NEXT:    v_mov_b32_e32 v5, v2
+; W64-O0-NEXT:    v_mov_b32_e32 v2, v1
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4
-; W64-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
-; W64-O0-NEXT:    s_waitcnt vmcnt(1)
-; W64-O0-NEXT:    v_mov_b32_e32 v7, v2
-; W64-O0-NEXT:    v_mov_b32_e32 v5, v7
-; W64-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 killed $vgpr6_vgpr7 killed $exec
+; W64-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
+; W64-O0-NEXT:    v_mov_b32_e32 v6, v3
+; W64-O0-NEXT:    v_mov_b32_e32 v4, v6
+; W64-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 killed $vgpr5_vgpr6 killed $exec
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4
-; W64-O0-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
-; W64-O0-NEXT:    v_mov_b32_e32 v2, v3
-; W64-O0-NEXT:    v_mov_b32_e32 v7, v2
-; W64-O0-NEXT:    ; kill: def $vgpr1 killed $vgpr1 killed $vgpr1_vgpr2 killed $exec
+; W64-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
+; W64-O0-NEXT:    v_mov_b32_e32 v1, v2
+; W64-O0-NEXT:    v_mov_b32_e32 v6, v1
+; W64-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4
-; W64-O0-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2_vgpr3_vgpr4 killed $exec
-; W64-O0-NEXT:    v_mov_b32_e32 v2, v7
-; W64-O0-NEXT:    v_mov_b32_e32 v3, v6
-; W64-O0-NEXT:    v_mov_b32_e32 v4, v5
-; W64-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; W64-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec
+; W64-O0-NEXT:    v_mov_b32_e32 v1, v6
+; W64-O0-NEXT:    v_mov_b32_e32 v2, v5
+; W64-O0-NEXT:    v_mov_b32_e32 v3, v4
+; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
-; W64-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
-; W64-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
-; W64-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
+; W64-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
+; W64-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; W64-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4_sgpr5
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4_sgpr5
 ; W64-O0-NEXT:    s_mov_b32 s4, 0
-; W64-O0-NEXT:    v_writelane_b32 v0, s4, 0
+; W64-O0-NEXT:    ; implicit-def: $vgpr7 : SGPR spill to VGPR lane
+; W64-O0-NEXT:    v_writelane_b32 v7, s4, 0
 ; W64-O0-NEXT:    s_mov_b64 s[4:5], exec
-; W64-O0-NEXT:    v_writelane_b32 v0, s4, 1
-; W64-O0-NEXT:    v_writelane_b32 v0, s5, 2
+; W64-O0-NEXT:    v_writelane_b32 v7, s4, 1
+; W64-O0-NEXT:    v_writelane_b32 v7, s5, 2
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; W64-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
 ; W64-O0-NEXT:  .LBB0_1: ; =>This Inner Loop Header: Depth=1
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
-; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
-; W64-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
-; W64-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
-; W64-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
-; W64-O0-NEXT:    v_readfirstlane_b32 s8, v1
-; W64-O0-NEXT:    v_readfirstlane_b32 s12, v2
+; W64-O0-NEXT:    v_readfirstlane_b32 s8, v0
+; W64-O0-NEXT:    v_readfirstlane_b32 s12, v1
 ; W64-O0-NEXT:    s_mov_b32 s4, s8
 ; W64-O0-NEXT:    s_mov_b32 s5, s12
-; W64-O0-NEXT:    v_cmp_eq_u64_e64 s[4:5], s[4:5], v[1:2]
-; W64-O0-NEXT:    v_readfirstlane_b32 s7, v3
-; W64-O0-NEXT:    v_readfirstlane_b32 s6, v4
+; W64-O0-NEXT:    v_cmp_eq_u64_e64 s[4:5], s[4:5], v[0:1]
+; W64-O0-NEXT:    v_readfirstlane_b32 s7, v2
+; W64-O0-NEXT:    v_readfirstlane_b32 s6, v3
 ; W64-O0-NEXT:    s_mov_b32 s10, s7
 ; W64-O0-NEXT:    s_mov_b32 s11, s6
-; W64-O0-NEXT:    v_cmp_eq_u64_e64 s[10:11], s[10:11], v[3:4]
+; W64-O0-NEXT:    v_cmp_eq_u64_e64 s[10:11], s[10:11], v[2:3]
 ; W64-O0-NEXT:    s_and_b64 s[4:5], s[4:5], s[10:11]
 ; W64-O0-NEXT:    ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11
 ; W64-O0-NEXT:    s_mov_b32 s9, s12
 ; W64-O0-NEXT:    s_mov_b32 s10, s7
 ; W64-O0-NEXT:    s_mov_b32 s11, s6
-; W64-O0-NEXT:    v_writelane_b32 v0, s8, 3
-; W64-O0-NEXT:    v_writelane_b32 v0, s9, 4
-; W64-O0-NEXT:    v_writelane_b32 v0, s10, 5
-; W64-O0-NEXT:    v_writelane_b32 v0, s11, 6
+; W64-O0-NEXT:    v_writelane_b32 v7, s8, 3
+; W64-O0-NEXT:    v_writelane_b32 v7, s9, 4
+; W64-O0-NEXT:    v_writelane_b32 v7, s10, 5
+; W64-O0-NEXT:    v_writelane_b32 v7, s11, 6
 ; W64-O0-NEXT:    s_and_saveexec_b64 s[4:5], s[4:5]
-; W64-O0-NEXT:    v_writelane_b32 v0, s4, 7
-; W64-O0-NEXT:    v_writelane_b32 v0, s5, 8
+; W64-O0-NEXT:    v_writelane_b32 v7, s4, 7
+; W64-O0-NEXT:    v_writelane_b32 v7, s5, 8
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; W64-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
 ; W64-O0-NEXT:  ; %bb.2: ; in Loop: Header=BB0_1 Depth=1
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
-; W64-O0-NEXT:    v_readlane_b32 s4, v1, 7
-; W64-O0-NEXT:    v_readlane_b32 s5, v1, 8
-; W64-O0-NEXT:    v_readlane_b32 s8, v1, 3
-; W64-O0-NEXT:    v_readlane_b32 s9, v1, 4
-; W64-O0-NEXT:    v_readlane_b32 s10, v1, 5
-; W64-O0-NEXT:    v_readlane_b32 s11, v1, 6
-; W64-O0-NEXT:    v_readlane_b32 s6, v1, 0
-; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
+; W64-O0-NEXT:    v_readlane_b32 s4, v7, 7
+; W64-O0-NEXT:    v_readlane_b32 s5, v7, 8
+; W64-O0-NEXT:    v_readlane_b32 s8, v7, 3
+; W64-O0-NEXT:    v_readlane_b32 s9, v7, 4
+; W64-O0-NEXT:    v_readlane_b32 s10, v7, 5
+; W64-O0-NEXT:    v_readlane_b32 s11, v7, 6
+; W64-O0-NEXT:    v_readlane_b32 s6, v7, 0
+; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
 ; W64-O0-NEXT:    s_nop 2
 ; W64-O0-NEXT:    buffer_load_format_x v0, v0, s[8:11], s6 idxen
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
-; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
+; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    s_xor_b64 exec, exec, s[4:5]
 ; W64-O0-NEXT:    s_cbranch_execnz .LBB0_1
 ; W64-O0-NEXT:  ; %bb.3:
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
-; W64-O0-NEXT:    v_readlane_b32 s4, v1, 1
-; W64-O0-NEXT:    v_readlane_b32 s5, v1, 2
+; W64-O0-NEXT:    v_readlane_b32 s4, v7, 1
+; W64-O0-NEXT:    v_readlane_b32 s5, v7, 2
 ; W64-O0-NEXT:    s_mov_b64 exec, s[4:5]
-; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
-; W64-O0-NEXT:    ; kill: killed $vgpr1
+; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_xor_saveexec_b64 s[4:5], -1
-; W64-O0-NEXT:    s_waitcnt vmcnt(0)
-; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
-; W64-O0-NEXT:    s_nop 0
-; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_mov_b64 exec, s[4:5]
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
 ; W64-O0-NEXT:    s_setpc_b64 s[30:31]
@@ -509,45 +497,42 @@ define void @mubuf_vgpr_adjacent_in_block(ptr addrspace(8) %i, ptr addrspace(8)
 ; W64-O0:       ; %bb.0: ; %entry
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; W64-O0-NEXT:    s_xor_saveexec_b64 s[4:5], -1
-; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
-; W64-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
+; W64-O0-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    s_mov_b64 exec, s[4:5]
-; W64-O0-NEXT:    ; implicit-def: $vgpr13 : SGPR spill to VGPR lane
 ; W64-O0-NEXT:    buffer_store_dword v11, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    buffer_store_dword v9, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
-; W64-O0-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
+; W64-O0-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
+; W64-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    v_mov_b32_e32 v14, v6
-; W64-O0-NEXT:    v_mov_b32_e32 v9, v5
+; W64-O0-NEXT:    v_mov_b32_e32 v8, v5
+; W64-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    v_mov_b32_e32 v13, v4
-; W64-O0-NEXT:    v_mov_b32_e32 v4, v3
-; W64-O0-NEXT:    v_mov_b32_e32 v8, v2
-; W64-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
-; W64-O0-NEXT:    v_mov_b32_e32 v5, v1
-; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
-; W64-O0-NEXT:    v_mov_b32_e32 v3, v0
-; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
-; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
+; W64-O0-NEXT:    v_mov_b32_e32 v7, v2
+; W64-O0-NEXT:    v_mov_b32_e32 v4, v1
+; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
+; W64-O0-NEXT:    v_mov_b32_e32 v2, v0
+; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4
 ; W64-O0-NEXT:    ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
-; W64-O0-NEXT:    v_mov_b32_e32 v15, v7
-; W64-O0-NEXT:    v_mov_b32_e32 v6, v15
-; W64-O0-NEXT:    v_mov_b32_e32 v7, v14
+; W64-O0-NEXT:    s_waitcnt vmcnt(2)
+; W64-O0-NEXT:    v_mov_b32_e32 v15, v5
+; W64-O0-NEXT:    v_mov_b32_e32 v5, v15
+; W64-O0-NEXT:    v_mov_b32_e32 v6, v14
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4
 ; W64-O0-NEXT:    ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14 killed $exec
-; W64-O0-NEXT:    v_mov_b32_e32 v14, v9
-; W64-O0-NEXT:    v_mov_b32_e32 v9, v14
+; W64-O0-NEXT:    v_mov_b32_e32 v14, v8
+; W64-O0-NEXT:    v_mov_b32_e32 v8, v14
 ; W64-O0-NEXT:    ; kill: def $vgpr13 killed $vgpr13 killed $vgpr13_vgpr14 killed $exec
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4
 ; W64-O0-NEXT:    ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14_vgpr15_vgpr16 killed $exec
-; W64-O0-NEXT:    v_mov_b32_e32 v14, v9
-; W64-O0-NEXT:    v_mov_b32_e32 v15, v7
-; W64-O0-NEXT:    v_mov_b32_e32 v16, v6
+; W64-O0-NEXT:    v_mov_b32_e32 v14, v8
+; W64-O0-NEXT:    v_mov_b32_e32 v15, v6
+; W64-O0-NEXT:    v_mov_b32_e32 v16, v5
 ; W64-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
 ; W64-O0-NEXT:    buffer_store_dword v14, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
@@ -555,43 +540,43 @@ define void @mubuf_vgpr_adjacent_in_block(ptr addrspace(8) %i, ptr addrspace(8)
 ; W64-O0-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4
-; W64-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
-; W64-O0-NEXT:    v_mov_b32_e32 v9, v4
-; W64-O0-NEXT:    v_mov_b32_e32 v7, v9
-; W64-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 killed $vgpr8_vgpr9 killed $exec
+; W64-O0-NEXT:    ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
+; W64-O0-NEXT:    v_mov_b32_e32 v8, v3
+; W64-O0-NEXT:    v_mov_b32_e32 v6, v8
+; W64-O0-NEXT:    ; kill: def $vgpr7 killed $vgpr7 killed $vgpr7_vgpr8 killed $exec
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4
-; W64-O0-NEXT:    ; kill: def $vgpr3 killed $vgpr3 def $vgpr3_vgpr4 killed $exec
-; W64-O0-NEXT:    v_mov_b32_e32 v4, v5
-; W64-O0-NEXT:    v_mov_b32_e32 v9, v4
-; W64-O0-NEXT:    ; kill: def $vgpr3 killed $vgpr3 killed $vgpr3_vgpr4 killed $exec
+; W64-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; W64-O0-NEXT:    v_mov_b32_e32 v3, v4
+; W64-O0-NEXT:    v_mov_b32_e32 v8, v3
+; W64-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr2 killed $vgpr2_vgpr3 killed $exec
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4
-; W64-O0-NEXT:    ; kill: def $vgpr3 killed $vgpr3 def $vgpr3_vgpr4_vgpr5_vgpr6 killed $exec
-; W64-O0-NEXT:    v_mov_b32_e32 v4, v9
-; W64-O0-NEXT:    v_mov_b32_e32 v5, v8
-; W64-O0-NEXT:    v_mov_b32_e32 v6, v7
-; W64-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
+; W64-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3_vgpr4_vgpr5 killed $exec
+; W64-O0-NEXT:    v_mov_b32_e32 v3, v8
+; W64-O0-NEXT:    v_mov_b32_e32 v4, v7
+; W64-O0-NEXT:    v_mov_b32_e32 v5, v6
+; W64-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
-; W64-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
-; W64-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
-; W64-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
+; W64-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
+; W64-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
+; W64-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4
-; W64-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; W64-O0-NEXT:    v_mov_b32_e32 v3, v12
-; W64-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; W64-O0-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
+; W64-O0-NEXT:    v_mov_b32_e32 v2, v12
+; W64-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
-; W64-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
+; W64-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4
-; W64-O0-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
-; W64-O0-NEXT:    v_mov_b32_e32 v2, v10
-; W64-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; W64-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
+; W64-O0-NEXT:    v_mov_b32_e32 v1, v10
+; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
-; W64-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
+; W64-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4_sgpr5
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4_sgpr5
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4_sgpr5
@@ -599,158 +584,157 @@ define void @mubuf_vgpr_adjacent_in_block(ptr addrspace(8) %i, ptr addrspace(8)
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4_sgpr5
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4_sgpr5
 ; W64-O0-NEXT:    s_mov_b32 s4, 0
-; W64-O0-NEXT:    v_writelane_b32 v0, s4, 0
+; W64-O0-NEXT:    ; implicit-def: $vgpr17 : SGPR spill to VGPR lane
+; W64-O0-NEXT:    v_writelane_b32 v17, s4, 0
 ; W64-O0-NEXT:    s_mov_b64 s[4:5], exec
-; W64-O0-NEXT:    v_writelane_b32 v0, s4, 1
-; W64-O0-NEXT:    v_writelane_b32 v0, s5, 2
+; W64-O0-NEXT:    v_writelane_b32 v17, s4, 1
+; W64-O0-NEXT:    v_writelane_b32 v17, s5, 2
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; W64-O0-NEXT:    buffer_store_dword v17, off, s[0:3], s32 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
 ; W64-O0-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v17, off, s[0:3], s32 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
-; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
-; W64-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
-; W64-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
-; W64-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
-; W64-O0-NEXT:    v_readfirstlane_b32 s8, v1
-; W64-O0-NEXT:    v_readfirstlane_b32 s12, v2
+; W64-O0-NEXT:    v_readfirstlane_b32 s8, v0
+; W64-O0-NEXT:    v_readfirstlane_b32 s12, v1
 ; W64-O0-NEXT:    s_mov_b32 s4, s8
 ; W64-O0-NEXT:    s_mov_b32 s5, s12
-; W64-O0-NEXT:    v_cmp_eq_u64_e64 s[4:5], s[4:5], v[1:2]
-; W64-O0-NEXT:    v_readfirstlane_b32 s7, v3
-; W64-O0-NEXT:    v_readfirstlane_b32 s6, v4
+; W64-O0-NEXT:    v_cmp_eq_u64_e64 s[4:5], s[4:5], v[0:1]
+; W64-O0-NEXT:    v_readfirstlane_b32 s7, v2
+; W64-O0-NEXT:    v_readfirstlane_b32 s6, v3
 ; W64-O0-NEXT:    s_mov_b32 s10, s7
 ; W64-O0-NEXT:    s_mov_b32 s11, s6
-; W64-O0-NEXT:    v_cmp_eq_u64_e64 s[10:11], s[10:11], v[3:4]
+; W64-O0-NEXT:    v_cmp_eq_u64_e64 s[10:11], s[10:11], v[2:3]
 ; W64-O0-NEXT:    s_and_b64 s[4:5], s[4:5], s[10:11]
 ; W64-O0-NEXT:    ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11
 ; W64-O0-NEXT:    s_mov_b32 s9, s12
 ; W64-O0-NEXT:    s_mov_b32 s10, s7
 ; W64-O0-NEXT:    s_mov_b32 s11, s6
-; W64-O0-NEXT:    v_writelane_b32 v0, s8, 3
-; W64-O0-NEXT:    v_writelane_b32 v0, s9, 4
-; W64-O0-NEXT:    v_writelane_b32 v0, s10, 5
-; W64-O0-NEXT:    v_writelane_b32 v0, s11, 6
+; W64-O0-NEXT:    v_writelane_b32 v17, s8, 3
+; W64-O0-NEXT:    v_writelane_b32 v17, s9, 4
+; W64-O0-NEXT:    v_writelane_b32 v17, s10, 5
+; W64-O0-NEXT:    v_writelane_b32 v17, s11, 6
 ; W64-O0-NEXT:    s_and_saveexec_b64 s[4:5], s[4:5]
-; W64-O0-NEXT:    v_writelane_b32 v0, s4, 7
-; W64-O0-NEXT:    v_writelane_b32 v0, s5, 8
+; W64-O0-NEXT:    v_writelane_b32 v17, s4, 7
+; W64-O0-NEXT:    v_writelane_b32 v17, s5, 8
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; W64-O0-NEXT:    buffer_store_dword v17, off, s[0:3], s32 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
 ; W64-O0-NEXT:  ; %bb.2: ; in Loop: Header=BB1_1 Depth=1
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v17, off, s[0:3], s32 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
-; W64-O0-NEXT:    v_readlane_b32 s4, v1, 7
-; W64-O0-NEXT:    v_readlane_b32 s5, v1, 8
-; W64-O0-NEXT:    v_readlane_b32 s8, v1, 3
-; W64-O0-NEXT:    v_readlane_b32 s9, v1, 4
-; W64-O0-NEXT:    v_readlane_b32 s10, v1, 5
-; W64-O0-NEXT:    v_readlane_b32 s11, v1, 6
-; W64-O0-NEXT:    v_readlane_b32 s6, v1, 0
-; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
+; W64-O0-NEXT:    v_readlane_b32 s4, v17, 7
+; W64-O0-NEXT:    v_readlane_b32 s5, v17, 8
+; W64-O0-NEXT:    v_readlane_b32 s8, v17, 3
+; W64-O0-NEXT:    v_readlane_b32 s9, v17, 4
+; W64-O0-NEXT:    v_readlane_b32 s10, v17, 5
+; W64-O0-NEXT:    v_readlane_b32 s11, v17, 6
+; W64-O0-NEXT:    v_readlane_b32 s6, v17, 0
+; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
 ; W64-O0-NEXT:    s_nop 2
 ; W64-O0-NEXT:    buffer_load_format_x v0, v0, s[8:11], s6 idxen
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
-; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
+; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    s_xor_b64 exec, exec, s[4:5]
 ; W64-O0-NEXT:    s_cbranch_execnz .LBB1_1
 ; W64-O0-NEXT:  ; %bb.3:
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v17, off, s[0:3], s32 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
-; W64-O0-NEXT:    v_readlane_b32 s4, v0, 1
-; W64-O0-NEXT:    v_readlane_b32 s5, v0, 2
+; W64-O0-NEXT:    v_readlane_b32 s4, v17, 1
+; W64-O0-NEXT:    v_readlane_b32 s5, v17, 2
 ; W64-O0-NEXT:    s_mov_b64 exec, s[4:5]
 ; W64-O0-NEXT:    s_mov_b64 s[4:5], exec
-; W64-O0-NEXT:    v_writelane_b32 v0, s4, 9
-; W64-O0-NEXT:    v_writelane_b32 v0, s5, 10
+; W64-O0-NEXT:    v_writelane_b32 v17, s4, 9
+; W64-O0-NEXT:    v_writelane_b32 v17, s5, 10
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; W64-O0-NEXT:    buffer_store_dword v17, off, s[0:3], s32 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
 ; W64-O0-NEXT:  .LBB1_4: ; =>This Inner Loop Header: Depth=1
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v17, off, s[0:3], s32 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
-; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
-; W64-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
-; W64-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
-; W64-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
-; W64-O0-NEXT:    v_readfirstlane_b32 s8, v1
-; W64-O0-NEXT:    v_readfirstlane_b32 s12, v2
+; W64-O0-NEXT:    v_readfirstlane_b32 s8, v0
+; W64-O0-NEXT:    v_readfirstlane_b32 s12, v1
 ; W64-O0-NEXT:    s_mov_b32 s4, s8
 ; W64-O0-NEXT:    s_mov_b32 s5, s12
-; W64-O0-NEXT:    v_cmp_eq_u64_e64 s[4:5], s[4:5], v[1:2]
-; W64-O0-NEXT:    v_readfirstlane_b32 s7, v3
-; W64-O0-NEXT:    v_readfirstlane_b32 s6, v4
+; W64-O0-NEXT:    v_cmp_eq_u64_e64 s[4:5], s[4:5], v[0:1]
+; W64-O0-NEXT:    v_readfirstlane_b32 s7, v2
+; W64-O0-NEXT:    v_readfirstlane_b32 s6, v3
 ; W64-O0-NEXT:    s_mov_b32 s10, s7
 ; W64-O0-NEXT:    s_mov_b32 s11, s6
-; W64-O0-NEXT:    v_cmp_eq_u64_e64 s[10:11], s[10:11], v[3:4]
+; W64-O0-NEXT:    v_cmp_eq_u64_e64 s[10:11], s[10:11], v[2:3]
 ; W64-O0-NEXT:    s_and_b64 s[4:5], s[4:5], s[10:11]
 ; W64-O0-NEXT:    ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11
 ; W64-O0-NEXT:    s_mov_b32 s9, s12
 ; W64-O0-NEXT:    s_mov_b32 s10, s7
 ; W64-O0-NEXT:    s_mov_b32 s11, s6
-; W64-O0-NEXT:    v_writelane_b32 v0, s8, 11
-; W64-O0-NEXT:    v_writelane_b32 v0, s9, 12
-; W64-O0-NEXT:    v_writelane_b32 v0, s10, 13
-; W64-O0-NEXT:    v_writelane_b32 v0, s11, 14
+; W64-O0-NEXT:    v_writelane_b32 v17, s8, 11
+; W64-O0-NEXT:    v_writelane_b32 v17, s9, 12
+; W64-O0-NEXT:    v_writelane_b32 v17, s10, 13
+; W64-O0-NEXT:    v_writelane_b32 v17, s11, 14
 ; W64-O0-NEXT:    s_and_saveexec_b64 s[4:5], s[4:5]
-; W64-O0-NEXT:    v_writelane_b32 v0, s4, 15
-; W64-O0-NEXT:    v_writelane_b32 v0, s5, 16
+; W64-O0-NEXT:    v_writelane_b32 v17, s4, 15
+; W64-O0-NEXT:    v_writelane_b32 v17, s5, 16
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; W64-O0-NEXT:    buffer_store_dword v17, off, s[0:3], s32 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
 ; W64-O0-NEXT:  ; %bb.5: ; in Loop: Header=BB1_4 Depth=1
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v17, off, s[0:3], s32 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
-; W64-O0-NEXT:    v_readlane_b32 s4, v1, 15
-; W64-O0-NEXT:    v_readlane_b32 s5, v1, 16
-; W64-O0-NEXT:    v_readlane_b32 s8, v1, 11
-; W64-O0-NEXT:    v_readlane_b32 s9, v1, 12
-; W64-O0-NEXT:    v_readlane_b32 s10, v1, 13
-; W64-O0-NEXT:    v_readlane_b32 s11, v1, 14
-; W64-O0-NEXT:    v_readlane_b32 s6, v1, 0
-; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
+; W64-O0-NEXT:    v_readlane_b32 s4, v17, 15
+; W64-O0-NEXT:    v_readlane_b32 s5, v17, 16
+; W64-O0-NEXT:    v_readlane_b32 s8, v17, 11
+; W64-O0-NEXT:    v_readlane_b32 s9, v17, 12
+; W64-O0-NEXT:    v_readlane_b32 s10, v17, 13
+; W64-O0-NEXT:    v_readlane_b32 s11, v17, 14
+; W64-O0-NEXT:    v_readlane_b32 s6, v17, 0
+; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
 ; W64-O0-NEXT:    s_nop 2
 ; W64-O0-NEXT:    buffer_load_format_x v0, v0, s[8:11], s6 idxen
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
-; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
+; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    s_xor_b64 exec, exec, s[4:5]
 ; W64-O0-NEXT:    s_cbranch_execnz .LBB1_4
 ; W64-O0-NEXT:  ; %bb.6:
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v17, off, s[0:3], s32 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
-; W64-O0-NEXT:    v_readlane_b32 s4, v0, 9
-; W64-O0-NEXT:    v_readlane_b32 s5, v0, 10
+; W64-O0-NEXT:    v_readlane_b32 s4, v17, 9
+; W64-O0-NEXT:    v_readlane_b32 s5, v17, 10
 ; W64-O0-NEXT:    s_mov_b64 exec, s[4:5]
-; W64-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
-; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
-; W64-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
-; W64-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
-; W64-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
-; W64-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
-; W64-O0-NEXT:    global_store_dword v[4:5], v6, off
+; W64-O0-NEXT:    global_store_dword v[3:4], v5, off
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
-; W64-O0-NEXT:    global_store_dword v[1:2], v3, off
+; W64-O0-NEXT:    global_store_dword v[0:1], v2, off
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
-; W64-O0-NEXT:    ; kill: killed $vgpr0
 ; W64-O0-NEXT:    s_xor_saveexec_b64 s[4:5], -1
-; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
-; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v17, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_mov_b64 exec, s[4:5]
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
 ; W64-O0-NEXT:    s_setpc_b64 s[30:31]
@@ -1058,48 +1042,42 @@ define void @mubuf_vgpr_outside_entry(ptr addrspace(8) %i, ptr addrspace(8) %j,
 ; W64-O0:       ; %bb.0: ; %entry
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; W64-O0-NEXT:    s_xor_saveexec_b64 s[4:5], -1
-; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
-; W64-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
+; W64-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    s_mov_b64 exec, s[4:5]
-; W64-O0-NEXT:    ; implicit-def: $vgpr8 : SGPR spill to VGPR lane
 ; W64-O0-NEXT:    buffer_store_dword v31, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    buffer_store_dword v11, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
-; W64-O0-NEXT:    v_mov_b32_e32 v6, v5
-; W64-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
+; W64-O0-NEXT:    v_mov_b32_e32 v5, v4
+; W64-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_nop 0
-; W64-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
-; W64-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
-; W64-O0-NEXT:    v_mov_b32_e32 v4, v3
-; W64-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
-; W64-O0-NEXT:    v_mov_b32_e32 v13, v2
-; W64-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
-; W64-O0-NEXT:    v_mov_b32_e32 v10, v1
-; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
+; W64-O0-NEXT:    v_mov_b32_e32 v9, v2
+; W64-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
+; W64-O0-NEXT:    v_mov_b32_e32 v6, v1
+; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    v_mov_b32_e32 v8, v0
-; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
-; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
+; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4
-; W64-O0-NEXT:    ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14 killed $exec
-; W64-O0-NEXT:    v_mov_b32_e32 v14, v4
-; W64-O0-NEXT:    v_mov_b32_e32 v4, v14
-; W64-O0-NEXT:    v_mov_b32_e32 v6, v13
+; W64-O0-NEXT:    ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
+; W64-O0-NEXT:    v_mov_b32_e32 v10, v3
+; W64-O0-NEXT:    v_mov_b32_e32 v3, v10
+; W64-O0-NEXT:    v_mov_b32_e32 v5, v9
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4
 ; W64-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
-; W64-O0-NEXT:    v_mov_b32_e32 v9, v10
-; W64-O0-NEXT:    v_mov_b32_e32 v13, v9
+; W64-O0-NEXT:    v_mov_b32_e32 v9, v6
+; W64-O0-NEXT:    v_mov_b32_e32 v6, v9
 ; W64-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 killed $vgpr8_vgpr9 killed $exec
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4
 ; W64-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9_vgpr10_vgpr11 killed $exec
-; W64-O0-NEXT:    v_mov_b32_e32 v9, v13
-; W64-O0-NEXT:    v_mov_b32_e32 v10, v6
-; W64-O0-NEXT:    v_mov_b32_e32 v11, v4
+; W64-O0-NEXT:    v_mov_b32_e32 v9, v6
+; W64-O0-NEXT:    v_mov_b32_e32 v10, v5
+; W64-O0-NEXT:    v_mov_b32_e32 v11, v3
 ; W64-O0-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
 ; W64-O0-NEXT:    buffer_store_dword v9, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
@@ -1107,88 +1085,89 @@ define void @mubuf_vgpr_outside_entry(ptr addrspace(8) %i, ptr addrspace(8) %j,
 ; W64-O0-NEXT:    buffer_store_dword v11, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4
-; W64-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
-; W64-O0-NEXT:    v_mov_b32_e32 v6, v7
+; W64-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
+; W64-O0-NEXT:    v_mov_b32_e32 v5, v7
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4
-; W64-O0-NEXT:    ; kill: def $vgpr3 killed $vgpr3 def $vgpr3_vgpr4 killed $exec
-; W64-O0-NEXT:    v_mov_b32_e32 v4, v2
+; W64-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; W64-O0-NEXT:    v_mov_b32_e32 v3, v1
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4
-; W64-O0-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
-; W64-O0-NEXT:    v_mov_b32_e32 v2, v12
-; W64-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
+; W64-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
+; W64-O0-NEXT:    v_mov_b32_e32 v1, v12
+; W64-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
-; W64-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
-; W64-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
+; W64-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
+; W64-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
-; W64-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
+; W64-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4_sgpr5
 ; W64-O0-NEXT:    ; implicit-def: $sgpr4_sgpr5
-; W64-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
+; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
-; W64-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; W64-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    ;;#ASMSTART
 ; W64-O0-NEXT:    s_mov_b32 s4, 17
 ; W64-O0-NEXT:    ;;#ASMEND
 ; W64-O0-NEXT:    s_mov_b32 s5, s4
-; W64-O0-NEXT:    v_writelane_b32 v0, s5, 0
+; W64-O0-NEXT:    ; implicit-def: $vgpr13 : SGPR spill to VGPR lane
+; W64-O0-NEXT:    v_writelane_b32 v13, s5, 0
 ; W64-O0-NEXT:    s_mov_b32 s5, 0
-; W64-O0-NEXT:    v_writelane_b32 v0, s5, 1
-; W64-O0-NEXT:    v_mov_b32_e32 v1, s4
-; W64-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; W64-O0-NEXT:    v_writelane_b32 v13, s5, 1
+; W64-O0-NEXT:    v_mov_b32_e32 v0, s4
+; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    s_mov_b64 s[4:5], exec
-; W64-O0-NEXT:    v_writelane_b32 v0, s4, 2
-; W64-O0-NEXT:    v_writelane_b32 v0, s5, 3
+; W64-O0-NEXT:    v_writelane_b32 v13, s4, 2
+; W64-O0-NEXT:    v_writelane_b32 v13, s5, 3
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; W64-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
 ; W64-O0-NEXT:  .LBB2_1: ; =>This Inner Loop Header: Depth=1
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
-; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
-; W64-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
-; W64-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
-; W64-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
-; W64-O0-NEXT:    v_readfirstlane_b32 s8, v1
-; W64-O0-NEXT:    v_readfirstlane_b32 s12, v2
+; W64-O0-NEXT:    v_readfirstlane_b32 s8, v0
+; W64-O0-NEXT:    v_readfirstlane_b32 s12, v1
 ; W64-O0-NEXT:    s_mov_b32 s4, s8
 ; W64-O0-NEXT:    s_mov_b32 s5, s12
-; W64-O0-NEXT:    v_cmp_eq_u64_e64 s[4:5], s[4:5], v[1:2]
-; W64-O0-NEXT:    v_readfirstlane_b32 s7, v3
-; W64-O0-NEXT:    v_readfirstlane_b32 s6, v4
+; W64-O0-NEXT:    v_cmp_eq_u64_e64 s[4:5], s[4:5], v[0:1]
+; W64-O0-NEXT:    v_readfirstlane_b32 s7, v2
+; W64-O0-NEXT:    v_readfirstlane_b32 s6, v3
 ; W64-O0-NEXT:    s_mov_b32 s10, s7
 ; W64-O0-NEXT:    s_mov_b32 s11, s6
-; W64-O0-NEXT:    v_cmp_eq_u64_e64 s[10:11], s[10:11], v[3:4]
+; W64-O0-NEXT:    v_cmp_eq_u64_e64 s[10:11], s[10:11], v[2:3]
 ; W64-O0-NEXT:    s_and_b64 s[4:5], s[4:5], s[10:11]
 ; W64-O0-NEXT:    ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11
 ; W64-O0-NEXT:    s_mov_b32 s9, s12
 ; W64-O0-NEXT:    s_mov_b32 s10, s7
 ; W64-O0-NEXT:    s_mov_b32 s11, s6
-; W64-O0-NEXT:    v_writelane_b32 v0, s8, 4
-; W64-O0-NEXT:    v_writelane_b32 v0, s9, 5
-; W64-O0-NEXT:    v_writelane_b32 v0, s10, 6
-; W64-O0-NEXT:    v_writelane_b32 v0, s11, 7
+; W64-O0-NEXT:    v_writelane_b32 v13, s8, 4
+; W64-O0-NEXT:    v_writelane_b32 v13, s9, 5
+; W64-O0-NEXT:    v_writelane_b32 v13, s10, 6
+; W64-O0-NEXT:    v_writelane_b32 v13, s11, 7
 ; W64-O0-NEXT:    s_and_saveexec_b64 s[4:5], s[4:5]
-; W64-O0-NEXT:    v_writelane_b32 v0, s4, 8
-; W64-O0-NEXT:    v_writelane_b32 v0, s5, 9
+; W64-O0-NEXT:    v_writelane_b32 v13, s4, 8
+; W64-O0-NEXT:    v_writelane_b32 v13, s5, 9
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; W64-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
 ; W64-O0-NEXT:  ; %bb.2: ; in Loop: Header=BB2_1 Depth=1
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
-; W64-O0-NEXT:    v_readlane_b32 s4, v1, 8
-; W64-O0-NEXT:    v_readlane_b32 s5, v1, 9
-; W64-O0-NEXT:    v_readlane_b32 s8, v1, 4
-; W64-O0-NEXT:    v_readlane_b32 s9, v1, 5
-; W64-O0-NEXT:    v_readlane_b32 s10, v1, 6
-; W64-O0-NEXT:    v_readlane_b32 s11, v1, 7
-; W64-O0-NEXT:    v_readlane_b32 s6, v1, 1
+; W64-O0-NEXT:    v_readlane_b32 s4, v13, 8
+; W64-O0-NEXT:    v_readlane_b32 s5, v13, 9
+; W64-O0-NEXT:    v_readlane_b32 s8, v13, 4
+; W64-O0-NEXT:    v_readlane_b32 s9, v13, 5
+; W64-O0-NEXT:    v_readlane_b32 s10, v13, 6
+; W64-O0-NEXT:    v_readlane_b32 s11, v13, 7
+; W64-O0-NEXT:    v_readlane_b32 s6, v13, 1
 ; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
 ; W64-O0-NEXT:    s_nop 2
@@ -1199,113 +1178,113 @@ define void @mubuf_vgpr_outside_entry(ptr addrspace(8) %i, ptr addrspace(8) %j,
 ; W64-O0-NEXT:    s_cbranch_execnz .LBB2_1
 ; W64-O0-NEXT:  ; %bb.3:
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
-; W64-O0-NEXT:    v_readlane_b32 s6, v0, 2
-; W64-O0-NEXT:    v_readlane_b32 s7, v0, 3
+; W64-O0-NEXT:    v_readlane_b32 s6, v13, 2
+; W64-O0-NEXT:    v_readlane_b32 s7, v13, 3
 ; W64-O0-NEXT:    s_mov_b64 exec, s[6:7]
-; W64-O0-NEXT:    v_readlane_b32 s4, v0, 1
-; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
-; W64-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
+; W64-O0-NEXT:    v_readlane_b32 s4, v13, 1
+; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_mov_b32 s5, 0x3ff
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
-; W64-O0-NEXT:    v_and_b32_e64 v2, v2, s5
-; W64-O0-NEXT:    v_cmp_eq_u32_e64 s[6:7], v2, s4
-; W64-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
+; W64-O0-NEXT:    v_and_b32_e64 v1, v1, s5
+; W64-O0-NEXT:    v_cmp_eq_u32_e64 s[6:7], v1, s4
+; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    s_mov_b64 s[4:5], exec
-; W64-O0-NEXT:    v_writelane_b32 v0, s4, 10
-; W64-O0-NEXT:    v_writelane_b32 v0, s5, 11
+; W64-O0-NEXT:    v_writelane_b32 v13, s4, 10
+; W64-O0-NEXT:    v_writelane_b32 v13, s5, 11
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; W64-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
 ; W64-O0-NEXT:    s_and_b64 s[4:5], s[4:5], s[6:7]
 ; W64-O0-NEXT:    s_mov_b64 exec, s[4:5]
 ; W64-O0-NEXT:    s_cbranch_execz .LBB2_8
 ; W64-O0-NEXT:  ; %bb.4: ; %bb1
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
-; W64-O0-NEXT:    v_readlane_b32 s4, v0, 0
-; W64-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
-; W64-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
-; W64-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
-; W64-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
+; W64-O0-NEXT:    v_readlane_b32 s4, v13, 0
+; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
-; W64-O0-NEXT:    v_mov_b32_e32 v7, v5
-; W64-O0-NEXT:    v_mov_b32_e32 v1, v4
-; W64-O0-NEXT:    v_mov_b32_e32 v5, v3
-; W64-O0-NEXT:    v_mov_b32_e32 v6, v2
+; W64-O0-NEXT:    v_mov_b32_e32 v6, v4
+; W64-O0-NEXT:    v_mov_b32_e32 v0, v3
+; W64-O0-NEXT:    v_mov_b32_e32 v4, v2
+; W64-O0-NEXT:    v_mov_b32_e32 v5, v1
 ; W64-O0-NEXT:    ; implicit-def: $sgpr5
 ; W64-O0-NEXT:    ; implicit-def: $sgpr5
 ; W64-O0-NEXT:    ; implicit-def: $sgpr5
 ; W64-O0-NEXT:    ; implicit-def: $sgpr5
-; W64-O0-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2_vgpr3_vgpr4 killed $exec
-; W64-O0-NEXT:    v_mov_b32_e32 v2, v7
-; W64-O0-NEXT:    v_mov_b32_e32 v3, v6
-; W64-O0-NEXT:    v_mov_b32_e32 v4, v5
-; W64-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
+; W64-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec
+; W64-O0-NEXT:    v_mov_b32_e32 v1, v6
+; W64-O0-NEXT:    v_mov_b32_e32 v2, v5
+; W64-O0-NEXT:    v_mov_b32_e32 v3, v4
+; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
-; W64-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
-; W64-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
-; W64-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
+; W64-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
+; W64-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
+; W64-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    s_mov_b32 s5, 0
-; W64-O0-NEXT:    v_writelane_b32 v0, s5, 12
-; W64-O0-NEXT:    v_mov_b32_e32 v1, s4
-; W64-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
+; W64-O0-NEXT:    v_writelane_b32 v13, s5, 12
+; W64-O0-NEXT:    v_mov_b32_e32 v0, s4
+; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    s_mov_b64 s[4:5], exec
-; W64-O0-NEXT:    v_writelane_b32 v0, s4, 13
-; W64-O0-NEXT:    v_writelane_b32 v0, s5, 14
+; W64-O0-NEXT:    v_writelane_b32 v13, s4, 13
+; W64-O0-NEXT:    v_writelane_b32 v13, s5, 14
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; W64-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
 ; W64-O0-NEXT:  .LBB2_5: ; =>This Inner Loop Header: Depth=1
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
-; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
-; W64-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload
-; W64-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload
-; W64-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
-; W64-O0-NEXT:    v_readfirstlane_b32 s8, v1
-; W64-O0-NEXT:    v_readfirstlane_b32 s12, v2
+; W64-O0-NEXT:    v_readfirstlane_b32 s8, v0
+; W64-O0-NEXT:    v_readfirstlane_b32 s12, v1
 ; W64-O0-NEXT:    s_mov_b32 s4, s8
 ; W64-O0-NEXT:    s_mov_b32 s5, s12
-; W64-O0-NEXT:    v_cmp_eq_u64_e64 s[4:5], s[4:5], v[1:2]
-; W64-O0-NEXT:    v_readfirstlane_b32 s7, v3
-; W64-O0-NEXT:    v_readfirstlane_b32 s6, v4
+; W64-O0-NEXT:    v_cmp_eq_u64_e64 s[4:5], s[4:5], v[0:1]
+; W64-O0-NEXT:    v_readfirstlane_b32 s7, v2
+; W64-O0-NEXT:    v_readfirstlane_b32 s6, v3
 ; W64-O0-NEXT:    s_mov_b32 s10, s7
 ; W64-O0-NEXT:    s_mov_b32 s11, s6
-; W64-O0-NEXT:    v_cmp_eq_u64_e64 s[10:11], s[10:11], v[3:4]
+; W64-O0-NEXT:    v_cmp_eq_u64_e64 s[10:11], s[10:11], v[2:3]
 ; W64-O0-NEXT:    s_and_b64 s[4:5], s[4:5], s[10:11]
 ; W64-O0-NEXT:    ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11
 ; W64-O0-NEXT:    s_mov_b32 s9, s12
 ; W64-O0-NEXT:    s_mov_b32 s10, s7
 ; W64-O0-NEXT:    s_mov_b32 s11, s6
-; W64-O0-NEXT:    v_writelane_b32 v0, s8, 15
-; W64-O0-NEXT:    v_writelane_b32 v0, s9, 16
-; W64-O0-NEXT:    v_writelane_b32 v0, s10, 17
-; W64-O0-NEXT:    v_writelane_b32 v0, s11, 18
+; W64-O0-NEXT:    v_writelane_b32 v13, s8, 15
+; W64-O0-NEXT:    v_writelane_b32 v13, s9, 16
+; W64-O0-NEXT:    v_writelane_b32 v13, s10, 17
+; W64-O0-NEXT:    v_writelane_b32 v13, s11, 18
 ; W64-O0-NEXT:    s_and_saveexec_b64 s[4:5], s[4:5]
-; W64-O0-NEXT:    v_writelane_b32 v0, s4, 19
-; W64-O0-NEXT:    v_writelane_b32 v0, s5, 20
+; W64-O0-NEXT:    v_writelane_b32 v13, s4, 19
+; W64-O0-NEXT:    v_writelane_b32 v13, s5, 20
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; W64-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 ; 4-byte Folded Spill
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
 ; W64-O0-NEXT:  ; %bb.6: ; in Loop: Header=BB2_5 Depth=1
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
-; W64-O0-NEXT:    v_readlane_b32 s4, v1, 19
-; W64-O0-NEXT:    v_readlane_b32 s5, v1, 20
-; W64-O0-NEXT:    v_readlane_b32 s8, v1, 15
-; W64-O0-NEXT:    v_readlane_b32 s9, v1, 16
-; W64-O0-NEXT:    v_readlane_b32 s10, v1, 17
-; W64-O0-NEXT:    v_readlane_b32 s11, v1, 18
-; W64-O0-NEXT:    v_readlane_b32 s6, v1, 12
+; W64-O0-NEXT:    v_readlane_b32 s4, v13, 19
+; W64-O0-NEXT:    v_readlane_b32 s5, v13, 20
+; W64-O0-NEXT:    v_readlane_b32 s8, v13, 15
+; W64-O0-NEXT:    v_readlane_b32 s9, v13, 16
+; W64-O0-NEXT:    v_readlane_b32 s10, v13, 17
+; W64-O0-NEXT:    v_readlane_b32 s11, v13, 18
+; W64-O0-NEXT:    v_readlane_b32 s6, v13, 12
 ; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
 ; W64-O0-NEXT:    s_nop 2
@@ -1316,33 +1295,31 @@ define void @mubuf_vgpr_outside_entry(ptr addrspace(8) %i, ptr addrspace(8) %j,
 ; W64-O0-NEXT:    s_cbranch_execnz .LBB2_5
 ; W64-O0-NEXT:  ; %bb.7:
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
-; W64-O0-NEXT:    v_readlane_b32 s4, v1, 13
-; W64-O0-NEXT:    v_readlane_b32 s5, v1, 14
+; W64-O0-NEXT:    v_readlane_b32 s4, v13, 13
+; W64-O0-NEXT:    v_readlane_b32 s5, v13, 14
 ; W64-O0-NEXT:    s_mov_b64 exec, s[4:5]
 ; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
 ; W64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
 ; W64-O0-NEXT:  .LBB2_8: ; %bb2
 ; W64-O0-NEXT:    s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_mov_b64 exec, s[16:17]
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
-; W64-O0-NEXT:    v_readlane_b32 s4, v0, 10
-; W64-O0-NEXT:    v_readlane_b32 s5, v0, 11
+; W64-O0-NEXT:    v_readlane_b32 s4, v13, 10
+; W64-O0-NEXT:    v_readlane_b32 s5, v13, 11
 ; W64-O0-NEXT:    s_or_b64 exec, exec, s[4:5]
-; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
-; W64-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
-; W64-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
-; W64-O0-NEXT:    global_store_dword v[1:2], v3, off
+; W64-O0-NEXT:    global_store_dword v[0:1], v2, off
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
-; W64-O0-NEXT:    ; kill: killed $vgpr0
 ; W64-O0-NEXT:    s_xor_saveexec_b64 s[4:5], -1
-; W64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload
-; W64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload
+; W64-O0-NEXT:    buffer_load_dword v13, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload
 ; W64-O0-NEXT:    s_mov_b64 exec, s[4:5]
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
 ; W64-O0-NEXT:    s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll b/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll
index 45fbaaabc65b5..5a9b059fb26dd 100644
--- a/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll
+++ b/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll
@@ -11,21 +11,17 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
   ; REGALLOC-GFX908-NEXT:   liveins: $sgpr4_sgpr5
   ; REGALLOC-GFX908-NEXT: {{  $}}
   ; REGALLOC-GFX908-NEXT:   INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2162697 /* reguse:AGPR_32 */, undef %5:agpr_32
-  ; REGALLOC-GFX908-NEXT:   INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6225930 /* regdef:VReg_128 */, def %26
-  ; REGALLOC-GFX908-NEXT:   [[COPY:%[0-9]+]]:av_128 = COPY %26
-  ; REGALLOC-GFX908-NEXT:   INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3538954 /* regdef:VReg_64 */, def %23
-  ; REGALLOC-GFX908-NEXT:   SI_SPILL_V64_SAVE %23, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5)
-  ; REGALLOC-GFX908-NEXT:   [[COPY1:%[0-9]+]]:vreg_128 = COPY [[COPY]]
-  ; REGALLOC-GFX908-NEXT:   GLOBAL_STORE_DWORDX4 undef %14:vreg_64, [[COPY1]], 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) undef`, addrspace 1)
+  ; REGALLOC-GFX908-NEXT:   INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6225930 /* regdef:VReg_128 */, def %6
+  ; REGALLOC-GFX908-NEXT:   INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3538954 /* regdef:VReg_64 */, def %7
+  ; REGALLOC-GFX908-NEXT:   GLOBAL_STORE_DWORDX4 undef %14:vreg_64, %6, 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) undef`, addrspace 1)
   ; REGALLOC-GFX908-NEXT:   renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg.kernarg.offset1, addrspace 4)
-  ; REGALLOC-GFX908-NEXT:   [[COPY2:%[0-9]+]]:areg_128 = COPY killed renamable $sgpr0_sgpr1_sgpr2_sgpr3
+  ; REGALLOC-GFX908-NEXT:   [[COPY:%[0-9]+]]:areg_128 = COPY killed renamable $sgpr0_sgpr1_sgpr2_sgpr3
   ; REGALLOC-GFX908-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
   ; REGALLOC-GFX908-NEXT:   [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec
-  ; REGALLOC-GFX908-NEXT:   [[V_MFMA_I32_4X4X4I8_e64_:%[0-9]+]]:areg_128 = V_MFMA_I32_4X4X4I8_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], [[COPY2]], 0, 0, 0, implicit $mode, implicit $exec
-  ; REGALLOC-GFX908-NEXT:   [[SI_SPILL_V64_RESTORE:%[0-9]+]]:vreg_64 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5)
-  ; REGALLOC-GFX908-NEXT:   GLOBAL_STORE_DWORDX2 undef %16:vreg_64, [[SI_SPILL_V64_RESTORE]], 0, 0, implicit $exec :: (volatile store (s64) into `ptr addrspace(1) undef`, addrspace 1)
-  ; REGALLOC-GFX908-NEXT:   [[COPY3:%[0-9]+]]:vreg_128 = COPY [[V_MFMA_I32_4X4X4I8_e64_]]
-  ; REGALLOC-GFX908-NEXT:   GLOBAL_STORE_DWORDX4 undef %18:vreg_64, [[COPY3]], 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) undef`, addrspace 1)
+  ; REGALLOC-GFX908-NEXT:   [[V_MFMA_I32_4X4X4I8_e64_:%[0-9]+]]:areg_128 = V_MFMA_I32_4X4X4I8_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
+  ; REGALLOC-GFX908-NEXT:   GLOBAL_STORE_DWORDX2 undef %16:vreg_64, %7, 0, 0, implicit $exec :: (volatile store (s64) into `ptr addrspace(1) undef`, addrspace 1)
+  ; REGALLOC-GFX908-NEXT:   [[COPY1:%[0-9]+]]:vreg_128 = COPY [[V_MFMA_I32_4X4X4I8_e64_]]
+  ; REGALLOC-GFX908-NEXT:   GLOBAL_STORE_DWORDX4 undef %18:vreg_64, [[COPY1]], 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) undef`, addrspace 1)
   ; REGALLOC-GFX908-NEXT:   S_ENDPGM 0
   ;
   ; PEI-GFX908-LABEL: name: partial_copy
@@ -60,18 +56,15 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
   ; REGALLOC-GFX90A-NEXT:   liveins: $sgpr4_sgpr5
   ; REGALLOC-GFX90A-NEXT: {{  $}}
   ; REGALLOC-GFX90A-NEXT:   INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2162697 /* reguse:AGPR_32 */, undef %5:agpr_32
-  ; REGALLOC-GFX90A-NEXT:   INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6553610 /* regdef:VReg_128_Align2 */, def %25
-  ; REGALLOC-GFX90A-NEXT:   [[COPY:%[0-9]+]]:av_128_align2 = COPY %25
-  ; REGALLOC-GFX90A-NEXT:   INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3866634 /* regdef:VReg_64_Align2 */, def %23
-  ; REGALLOC-GFX90A-NEXT:   SI_SPILL_V64_SAVE %23, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5)
-  ; REGALLOC-GFX90A-NEXT:   GLOBAL_STORE_DWORDX4 undef %14:vreg_64_align2, [[COPY]], 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) undef`, addrspace 1)
+  ; REGALLOC-GFX90A-NEXT:   INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6553610 /* regdef:VReg_128_Align2 */, def %6
+  ; REGALLOC-GFX90A-NEXT:   INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3866634 /* regdef:VReg_64_Align2 */, def %7
+  ; REGALLOC-GFX90A-NEXT:   GLOBAL_STORE_DWORDX4 undef %14:vreg_64_align2, %6, 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) undef`, addrspace 1)
   ; REGALLOC-GFX90A-NEXT:   renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg.kernarg.offset1, addrspace 4)
-  ; REGALLOC-GFX90A-NEXT:   [[COPY1:%[0-9]+]]:areg_128_align2 = COPY killed renamable $sgpr0_sgpr1_sgpr2_sgpr3
+  ; REGALLOC-GFX90A-NEXT:   [[COPY:%[0-9]+]]:areg_128_align2 = COPY killed renamable $sgpr0_sgpr1_sgpr2_sgpr3
   ; REGALLOC-GFX90A-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
   ; REGALLOC-GFX90A-NEXT:   [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec
-  ; REGALLOC-GFX90A-NEXT:   [[V_MFMA_I32_4X4X4I8_e64_:%[0-9]+]]:areg_128_align2 = V_MFMA_I32_4X4X4I8_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
-  ; REGALLOC-GFX90A-NEXT:   [[SI_SPILL_AV64_RESTORE:%[0-9]+]]:av_64_align2 = SI_SPILL_AV64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5)
-  ; REGALLOC-GFX90A-NEXT:   GLOBAL_STORE_DWORDX2 undef %16:vreg_64_align2, [[SI_SPILL_AV64_RESTORE]], 0, 0, implicit $exec :: (volatile store (s64) into `ptr addrspace(1) undef`, addrspace 1)
+  ; REGALLOC-GFX90A-NEXT:   [[V_MFMA_I32_4X4X4I8_e64_:%[0-9]+]]:areg_128_align2 = V_MFMA_I32_4X4X4I8_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
+  ; REGALLOC-GFX90A-NEXT:   GLOBAL_STORE_DWORDX2 undef %16:vreg_64_align2, %7, 0, 0, implicit $exec :: (volatile store (s64) into `ptr addrspace(1) undef`, addrspace 1)
   ; REGALLOC-GFX90A-NEXT:   GLOBAL_STORE_DWORDX4 undef %18:vreg_64_align2, [[V_MFMA_I32_4X4X4I8_e64_]], 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) undef`, addrspace 1)
   ; REGALLOC-GFX90A-NEXT:   S_ENDPGM 0
   ;
diff --git a/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll b/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll
index 5b0354e63c236..e00eb0a582e63 100644
--- a/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll
+++ b/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll
@@ -1,6 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -O0 -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
+; XFAIL: *
+
 ; FIXME: we should disable sdwa peephole because dead-code elimination, that
 ; runs after peephole, ruins this test (different register numbers)
 
diff --git a/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain-preserve.mir b/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain-preserve.mir
index 8e2a56b463c40..11280b5008b52 100644
--- a/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain-preserve.mir
+++ b/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain-preserve.mir
@@ -61,35 +61,27 @@ machineFunctionInfo:
   isChainFunction: true
   returnsVoid:     true
   wwmReservedRegs:
-    - '$vgpr11'
+    - '$vgpr10'
 body:             |
   bb.0:
     liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9, $vgpr10
 
     ; GCN-LABEL: name: preserve_all_lanes_wwm_above_args
-    ; GCN: liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9, $vgpr10
+    ; GCN: liveins: $sgpr0, $sgpr35, $vgpr0, $vgpr8, $vgpr9
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: $sgpr1 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GCN-NEXT: SCRATCH_STORE_DWORD_ST $vgpr10, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN-NEXT: SCRATCH_STORE_DWORD_ST killed $vgpr11, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1
-    ; GCN-NEXT: renamable $vgpr10 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr10
+    ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr0
     ; GCN-NEXT: $sgpr35 = S_MOV_B32 5
-    ; GCN-NEXT: $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr10, 0
-    ; GCN-NEXT: renamable $vgpr10 = V_MOV_B32_e32 10, implicit $exec
-    ; GCN-NEXT: $vgpr8 = COPY killed renamable $vgpr10
+    ; GCN-NEXT: $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0
+    ; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 10, implicit $exec
+    ; GCN-NEXT: $vgpr8 = COPY killed $vgpr0
     ; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
     ; GCN-NEXT: renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
-    ; GCN-NEXT: $sgpr1 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GCN-NEXT: $vgpr10 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5)
-    ; GCN-NEXT: $vgpr11 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5)
-    ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1
     ; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $vgpr9
-    renamable $vgpr10 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr10
+    $vgpr10 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr10
     $sgpr35 = S_MOV_B32 5
     $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr10, 0
-    renamable $vgpr10 = V_MOV_B32_e32 10, implicit $exec
-    $vgpr8 = COPY renamable killed $vgpr10
+    $vgpr10 = V_MOV_B32_e32 10, implicit $exec
+    $vgpr8 = COPY killed $vgpr10
     renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
     renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
     SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $vgpr9
@@ -139,23 +131,15 @@ body:             |
     liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9, $vgpr10
 
     ; GCN-LABEL: name: preserve_inactive_lanes_wwm_args
-    ; GCN: liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9, $vgpr10
+    ; GCN: liveins: $sgpr0, $sgpr35, $vgpr0, $vgpr8, $vgpr10
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: $sgpr1 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GCN-NEXT: SCRATCH_STORE_DWORD_ST $vgpr8, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN-NEXT: SCRATCH_STORE_DWORD_ST $vgpr9, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1
     ; GCN-NEXT: renamable $vgpr8 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr8
     ; GCN-NEXT: $sgpr35 = S_MOV_B32 5
     ; GCN-NEXT: $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr8, 0
     ; GCN-NEXT: renamable $vgpr8 = V_MOV_B32_e32 10, implicit $exec
     ; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
     ; GCN-NEXT: renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
-    ; GCN-NEXT: $sgpr1 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GCN-NEXT: $vgpr8 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr8(tied-def 0) :: (load (s32) from %stack.0, addrspace 5)
-    ; GCN-NEXT: $vgpr9 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit $vgpr9(tied-def 0) :: (load (s32) from %stack.1, addrspace 5)
-    ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1
-    ; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $vgpr9
+    ; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $vgpr0
     renamable $vgpr8 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr8
     $sgpr35 = S_MOV_B32 5
     $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr8, 0
@@ -216,7 +200,7 @@ body:             |
     liveins: $sgpr0, $sgpr35, $vgpr0, $vgpr8
 
     ; GCN-LABEL: name: dont_preserve_v0_v7
-    ; GCN: liveins: $sgpr0, $sgpr35, $vgpr0, $vgpr8
+    ; GCN: liveins: $sgpr0, $sgpr35, $vgpr0, $vgpr1, $vgpr8
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr0
     ; GCN-NEXT: $sgpr35 = S_MOV_B32 5
diff --git a/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain.mir b/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain.mir
index 765597fecd20e..e0a3f9f597c4b 100644
--- a/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain.mir
+++ b/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain.mir
@@ -35,19 +35,11 @@ body:             |
     liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9
 
     ; GCN-LABEL: name: preserve_inactive_wwm
-    ; GCN: liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9
+    ; GCN: liveins: $sgpr0, $sgpr35, $vgpr0
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: $sgpr1 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GCN-NEXT: SCRATCH_STORE_DWORD_ST $vgpr8, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN-NEXT: SCRATCH_STORE_DWORD_ST $vgpr9, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1
     ; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
     ; GCN-NEXT: renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
-    ; GCN-NEXT: $sgpr1 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GCN-NEXT: $vgpr8 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr8(tied-def 0) :: (load (s32) from %stack.0, addrspace 5)
-    ; GCN-NEXT: $vgpr9 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5)
-    ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1
-    ; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8
+    ; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr0
     renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
     renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
     SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8
@@ -71,10 +63,6 @@ body:             |
     ; GCN-LABEL: name: preserve_inactive_detected_wwm
     ; GCN: liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: $sgpr1 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GCN-NEXT: SCRATCH_STORE_DWORD_ST $vgpr8, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN-NEXT: SCRATCH_STORE_DWORD_ST $vgpr9, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1
     ; GCN-NEXT: renamable $vgpr8 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr8
     ; GCN-NEXT: $sgpr35 = S_MOV_B32 5
     ; GCN-NEXT: $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr8, 0
@@ -85,10 +73,6 @@ body:             |
     ; GCN-NEXT: renamable $vgpr9 = V_MOV_B32_e32 10, implicit $exec
     ; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
     ; GCN-NEXT: renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
-    ; GCN-NEXT: $sgpr1 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GCN-NEXT: $vgpr8 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr8(tied-def 0) :: (load (s32) from %stack.0, addrspace 5)
-    ; GCN-NEXT: $vgpr9 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5)
-    ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1
     ; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8
     renamable $vgpr8 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr8
     $sgpr35 = S_MOV_B32 5
@@ -178,7 +162,7 @@ body:             |
     liveins: $sgpr0, $sgpr35, $vgpr0, $vgpr7, $vgpr8, $vgpr9
 
     ; GCN-LABEL: name: dont_preserve_v0_v7
-    ; GCN: liveins: $sgpr0, $sgpr35, $vgpr0, $vgpr7, $vgpr8, $vgpr9
+    ; GCN: liveins: $sgpr0, $sgpr35, $vgpr0, $vgpr1, $vgpr7, $vgpr8, $vgpr9
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr0
     ; GCN-NEXT: $sgpr35 = S_MOV_B32 5
diff --git a/llvm/test/CodeGen/AMDGPU/pr51516.mir b/llvm/test/CodeGen/AMDGPU/pr51516.mir
index b21285e83dc21..4be102f7860ea 100644
--- a/llvm/test/CodeGen/AMDGPU/pr51516.mir
+++ b/llvm/test/CodeGen/AMDGPU/pr51516.mir
@@ -1,4 +1,4 @@
-# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx900 -amdgpu-disable-unclustered-high-rp-reschedule -verify-machineinstrs -start-before=machine-scheduler -stop-after=virtregrewriter,1 -o - %s | FileCheck -check-prefix=GCN %s
+# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx900 -amdgpu-disable-unclustered-high-rp-reschedule -verify-machineinstrs -start-before=machine-scheduler -stop-after=virtregrewriter,2 -o - %s | FileCheck -check-prefix=GCN %s
 
 # Check that %3 was not rematerialized before the last store since its operand %1
 # is killed by that store.
diff --git a/llvm/test/CodeGen/AMDGPU/preserve-only-inactive-lane.mir b/llvm/test/CodeGen/AMDGPU/preserve-only-inactive-lane.mir
index 4571e792c7cb5..9d3fd8f80a116 100644
--- a/llvm/test/CodeGen/AMDGPU/preserve-only-inactive-lane.mir
+++ b/llvm/test/CodeGen/AMDGPU/preserve-only-inactive-lane.mir
@@ -20,16 +20,10 @@ body:             |
     ; GCN-LABEL: name: preserve_scratch_vgpr_inactive_lanes
     ; GCN: liveins: $sgpr35, $vgpr0, $sgpr30_sgpr31
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
     ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr0
     ; GCN-NEXT: $sgpr35 = S_MOV_B32 5
     ; GCN-NEXT: $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0
     ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e32 10, implicit $exec
-    ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $vgpr0(tied-def 0) :: (load (s32) from %stack.0, addrspace 5)
-    ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
     ; GCN-NEXT: S_SETPC_B64_return killed renamable $sgpr30_sgpr31, implicit $vgpr0
     renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr0
     $sgpr35 = S_MOV_B32 5
diff --git a/llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll b/llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll
index fbe34a3a3970b..8b30e1cae60fc 100644
--- a/llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll
+++ b/llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll
@@ -13,333 +13,333 @@ define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 {
 ; GFX906-NEXT:    s_mov_b32 s16, s33
 ; GFX906-NEXT:    s_mov_b32 s33, s32
 ; GFX906-NEXT:    s_xor_saveexec_b64 s[18:19], -1
-; GFX906-NEXT:    buffer_store_dword v33, off, s[0:3], s33 offset:144 ; 4-byte Folded Spill
-; GFX906-NEXT:    buffer_store_dword v2, off, s[0:3], s33 offset:152 ; 4-byte Folded Spill
+; GFX906-NEXT:    buffer_store_dword v39, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
 ; GFX906-NEXT:    s_mov_b64 exec, -1
-; GFX906-NEXT:    buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
-; GFX906-NEXT:    buffer_store_dword v40, off, s[0:3], s33 offset:148 ; 4-byte Folded Spill
+; GFX906-NEXT:    buffer_store_dword v41, off, s[0:3], s33 offset:148 ; 4-byte Folded Spill
 ; GFX906-NEXT:    s_mov_b64 exec, s[18:19]
-; GFX906-NEXT:    ; implicit-def: $vgpr2
+; GFX906-NEXT:    ; implicit-def: $vgpr39 : SGPR spill to VGPR lane
 ; GFX906-NEXT:    s_mov_b32 s21, s15
-; GFX906-NEXT:    v_writelane_b32 v2, s6, 0
-; GFX906-NEXT:    v_writelane_b32 v2, s7, 1
-; GFX906-NEXT:    v_writelane_b32 v2, s21, 2
+; GFX906-NEXT:    v_writelane_b32 v39, s6, 0
+; GFX906-NEXT:    v_writelane_b32 v39, s7, 1
+; GFX906-NEXT:    v_writelane_b32 v39, s21, 2
 ; GFX906-NEXT:    s_mov_b32 s22, s14
-; GFX906-NEXT:    v_writelane_b32 v2, s22, 3
+; GFX906-NEXT:    v_writelane_b32 v39, s22, 3
 ; GFX906-NEXT:    s_mov_b32 s23, s13
-; GFX906-NEXT:    v_writelane_b32 v2, s23, 4
+; GFX906-NEXT:    v_writelane_b32 v39, s23, 4
 ; GFX906-NEXT:    s_mov_b32 s24, s12
-; GFX906-NEXT:    v_writelane_b32 v2, s24, 5
+; GFX906-NEXT:    v_writelane_b32 v39, s24, 5
 ; GFX906-NEXT:    s_mov_b64 s[26:27], s[10:11]
-; GFX906-NEXT:    v_writelane_b32 v2, s26, 6
+; GFX906-NEXT:    v_writelane_b32 v39, s26, 6
 ; GFX906-NEXT:    v_writelane_b32 v41, s16, 4
-; GFX906-NEXT:    v_writelane_b32 v2, s27, 7
+; GFX906-NEXT:    v_writelane_b32 v39, s27, 7
 ; GFX906-NEXT:    v_writelane_b32 v41, s34, 2
-; GFX906-NEXT:    v_writelane_b32 v2, s8, 8
+; GFX906-NEXT:    v_writelane_b32 v39, s8, 8
 ; GFX906-NEXT:    v_writelane_b32 v41, s35, 3
-; GFX906-NEXT:    v_writelane_b32 v2, s9, 9
+; GFX906-NEXT:    v_writelane_b32 v39, s9, 9
 ; GFX906-NEXT:    v_writelane_b32 v41, s30, 0
-; GFX906-NEXT:    v_writelane_b32 v2, s4, 10
+; GFX906-NEXT:    v_writelane_b32 v39, s4, 10
 ; GFX906-NEXT:    s_addk_i32 s32, 0x2800
+; GFX906-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
 ; GFX906-NEXT:    v_writelane_b32 v41, s31, 1
 ; GFX906-NEXT:    v_mov_b32_e32 v32, v31
-; GFX906-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
+; GFX906-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
 ; GFX906-NEXT:    s_waitcnt vmcnt(0)
-; GFX906-NEXT:    buffer_store_dword v1, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
-; GFX906-NEXT:    v_writelane_b32 v2, s5, 11
+; GFX906-NEXT:    buffer_store_dword v1, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill
+; GFX906-NEXT:    v_writelane_b32 v39, s5, 11
 ; GFX906-NEXT:    s_or_saveexec_b64 s[34:35], -1
-; GFX906-NEXT:    v_mov_b32_e32 v33, v2
 ; GFX906-NEXT:    s_mov_b64 exec, s[34:35]
 ; GFX906-NEXT:    ;;#ASMSTART
 ; GFX906-NEXT:    ; def v[0:31]
 ; GFX906-NEXT:    ;;#ASMEND
-; GFX906-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill
+; GFX906-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill
 ; GFX906-NEXT:    s_waitcnt vmcnt(0)
-; GFX906-NEXT:    buffer_store_dword v1, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill
-; GFX906-NEXT:    buffer_store_dword v2, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill
-; GFX906-NEXT:    buffer_store_dword v3, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill
-; GFX906-NEXT:    buffer_store_dword v4, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill
-; GFX906-NEXT:    buffer_store_dword v5, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill
-; GFX906-NEXT:    buffer_store_dword v6, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill
-; GFX906-NEXT:    buffer_store_dword v7, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill
-; GFX906-NEXT:    buffer_store_dword v8, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill
-; GFX906-NEXT:    buffer_store_dword v9, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill
-; GFX906-NEXT:    buffer_store_dword v10, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill
-; GFX906-NEXT:    buffer_store_dword v11, off, s[0:3], s33 offset:60 ; 4-byte Folded Spill
-; GFX906-NEXT:    buffer_store_dword v12, off, s[0:3], s33 offset:64 ; 4-byte Folded Spill
-; GFX906-NEXT:    buffer_store_dword v13, off, s[0:3], s33 offset:68 ; 4-byte Folded Spill
-; GFX906-NEXT:    buffer_store_dword v14, off, s[0:3], s33 offset:72 ; 4-byte Folded Spill
-; GFX906-NEXT:    buffer_store_dword v15, off, s[0:3], s33 offset:76 ; 4-byte Folded Spill
-; GFX906-NEXT:    buffer_store_dword v16, off, s[0:3], s33 offset:80 ; 4-byte Folded Spill
-; GFX906-NEXT:    buffer_store_dword v17, off, s[0:3], s33 offset:84 ; 4-byte Folded Spill
-; GFX906-NEXT:    buffer_store_dword v18, off, s[0:3], s33 offset:88 ; 4-byte Folded Spill
-; GFX906-NEXT:    buffer_store_dword v19, off, s[0:3], s33 offset:92 ; 4-byte Folded Spill
-; GFX906-NEXT:    buffer_store_dword v20, off, s[0:3], s33 offset:96 ; 4-byte Folded Spill
-; GFX906-NEXT:    buffer_store_dword v21, off, s[0:3], s33 offset:100 ; 4-byte Folded Spill
-; GFX906-NEXT:    buffer_store_dword v22, off, s[0:3], s33 offset:104 ; 4-byte Folded Spill
-; GFX906-NEXT:    buffer_store_dword v23, off, s[0:3], s33 offset:108 ; 4-byte Folded Spill
-; GFX906-NEXT:    buffer_store_dword v24, off, s[0:3], s33 offset:112 ; 4-byte Folded Spill
-; GFX906-NEXT:    buffer_store_dword v25, off, s[0:3], s33 offset:116 ; 4-byte Folded Spill
-; GFX906-NEXT:    buffer_store_dword v26, off, s[0:3], s33 offset:120 ; 4-byte Folded Spill
-; GFX906-NEXT:    buffer_store_dword v27, off, s[0:3], s33 offset:124 ; 4-byte Folded Spill
-; GFX906-NEXT:    buffer_store_dword v28, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill
-; GFX906-NEXT:    buffer_store_dword v29, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill
-; GFX906-NEXT:    buffer_store_dword v30, off, s[0:3], s33 offset:136 ; 4-byte Folded Spill
-; GFX906-NEXT:    buffer_store_dword v31, off, s[0:3], s33 offset:140 ; 4-byte Folded Spill
+; GFX906-NEXT:    buffer_store_dword v1, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill
+; GFX906-NEXT:    buffer_store_dword v2, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill
+; GFX906-NEXT:    buffer_store_dword v3, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill
+; GFX906-NEXT:    buffer_store_dword v4, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill
+; GFX906-NEXT:    buffer_store_dword v5, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill
+; GFX906-NEXT:    buffer_store_dword v6, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill
+; GFX906-NEXT:    buffer_store_dword v7, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill
+; GFX906-NEXT:    buffer_store_dword v8, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill
+; GFX906-NEXT:    buffer_store_dword v9, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill
+; GFX906-NEXT:    buffer_store_dword v10, off, s[0:3], s33 offset:60 ; 4-byte Folded Spill
+; GFX906-NEXT:    buffer_store_dword v11, off, s[0:3], s33 offset:64 ; 4-byte Folded Spill
+; GFX906-NEXT:    buffer_store_dword v12, off, s[0:3], s33 offset:68 ; 4-byte Folded Spill
+; GFX906-NEXT:    buffer_store_dword v13, off, s[0:3], s33 offset:72 ; 4-byte Folded Spill
+; GFX906-NEXT:    buffer_store_dword v14, off, s[0:3], s33 offset:76 ; 4-byte Folded Spill
+; GFX906-NEXT:    buffer_store_dword v15, off, s[0:3], s33 offset:80 ; 4-byte Folded Spill
+; GFX906-NEXT:    buffer_store_dword v16, off, s[0:3], s33 offset:84 ; 4-byte Folded Spill
+; GFX906-NEXT:    buffer_store_dword v17, off, s[0:3], s33 offset:88 ; 4-byte Folded Spill
+; GFX906-NEXT:    buffer_store_dword v18, off, s[0:3], s33 offset:92 ; 4-byte Folded Spill
+; GFX906-NEXT:    buffer_store_dword v19, off, s[0:3], s33 offset:96 ; 4-byte Folded Spill
+; GFX906-NEXT:    buffer_store_dword v20, off, s[0:3], s33 offset:100 ; 4-byte Folded Spill
+; GFX906-NEXT:    buffer_store_dword v21, off, s[0:3], s33 offset:104 ; 4-byte Folded Spill
+; GFX906-NEXT:    buffer_store_dword v22, off, s[0:3], s33 offset:108 ; 4-byte Folded Spill
+; GFX906-NEXT:    buffer_store_dword v23, off, s[0:3], s33 offset:112 ; 4-byte Folded Spill
+; GFX906-NEXT:    buffer_store_dword v24, off, s[0:3], s33 offset:116 ; 4-byte Folded Spill
+; GFX906-NEXT:    buffer_store_dword v25, off, s[0:3], s33 offset:120 ; 4-byte Folded Spill
+; GFX906-NEXT:    buffer_store_dword v26, off, s[0:3], s33 offset:124 ; 4-byte Folded Spill
+; GFX906-NEXT:    buffer_store_dword v27, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill
+; GFX906-NEXT:    buffer_store_dword v28, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill
+; GFX906-NEXT:    buffer_store_dword v29, off, s[0:3], s33 offset:136 ; 4-byte Folded Spill
+; GFX906-NEXT:    buffer_store_dword v30, off, s[0:3], s33 offset:140 ; 4-byte Folded Spill
+; GFX906-NEXT:    buffer_store_dword v31, off, s[0:3], s33 offset:144 ; 4-byte Folded Spill
 ; GFX906-NEXT:    ;;#ASMSTART
 ; GFX906-NEXT:    ; def v40
 ; GFX906-NEXT:    ;;#ASMEND
 ; GFX906-NEXT:    ;;#ASMSTART
 ; GFX906-NEXT:    ; def s11
 ; GFX906-NEXT:    ;;#ASMEND
-; GFX906-NEXT:    s_or_saveexec_b64 s[34:35], -1
-; GFX906-NEXT:    v_mov_b32_e32 v40, v33
-; GFX906-NEXT:    s_mov_b64 exec, s[34:35]
-; GFX906-NEXT:    v_writelane_b32 v40, s11, 12
+; GFX906-NEXT:    v_writelane_b32 v39, s11, 12
 ; GFX906-NEXT:    ;;#ASMSTART
 ; GFX906-NEXT:    ; def s12
 ; GFX906-NEXT:    ;;#ASMEND
-; GFX906-NEXT:    v_writelane_b32 v40, s12, 13
+; GFX906-NEXT:    v_writelane_b32 v39, s12, 13
 ; GFX906-NEXT:    ;;#ASMSTART
 ; GFX906-NEXT:    ; def s13
 ; GFX906-NEXT:    ;;#ASMEND
-; GFX906-NEXT:    v_writelane_b32 v40, s13, 14
+; GFX906-NEXT:    v_writelane_b32 v39, s13, 14
 ; GFX906-NEXT:    ;;#ASMSTART
 ; GFX906-NEXT:    ; def s14
 ; GFX906-NEXT:    ;;#ASMEND
-; GFX906-NEXT:    v_writelane_b32 v40, s14, 15
+; GFX906-NEXT:    v_writelane_b32 v39, s14, 15
 ; GFX906-NEXT:    ;;#ASMSTART
 ; GFX906-NEXT:    ; def s15
 ; GFX906-NEXT:    ;;#ASMEND
-; GFX906-NEXT:    v_writelane_b32 v40, s15, 16
+; GFX906-NEXT:    v_writelane_b32 v39, s15, 16
 ; GFX906-NEXT:    s_getpc_b64 s[10:11]
 ; GFX906-NEXT:    s_add_u32 s10, s10, foo at gotpcrel32@lo+4
 ; GFX906-NEXT:    s_addc_u32 s11, s11, foo at gotpcrel32@hi+12
 ; GFX906-NEXT:    ;;#ASMSTART
 ; GFX906-NEXT:    ; def s16
 ; GFX906-NEXT:    ;;#ASMEND
-; GFX906-NEXT:    v_writelane_b32 v40, s16, 17
+; GFX906-NEXT:    v_writelane_b32 v39, s16, 17
 ; GFX906-NEXT:    s_load_dwordx2 s[10:11], s[10:11], 0x0
 ; GFX906-NEXT:    ;;#ASMSTART
 ; GFX906-NEXT:    ; def s17
 ; GFX906-NEXT:    ;;#ASMEND
-; GFX906-NEXT:    v_writelane_b32 v40, s17, 18
+; GFX906-NEXT:    v_writelane_b32 v39, s17, 18
 ; GFX906-NEXT:    ;;#ASMSTART
 ; GFX906-NEXT:    ; def s18
 ; GFX906-NEXT:    ;;#ASMEND
-; GFX906-NEXT:    v_writelane_b32 v40, s18, 19
+; GFX906-NEXT:    v_writelane_b32 v39, s18, 19
 ; GFX906-NEXT:    ;;#ASMSTART
 ; GFX906-NEXT:    ; def s19
 ; GFX906-NEXT:    ;;#ASMEND
-; GFX906-NEXT:    v_writelane_b32 v40, s19, 20
+; GFX906-NEXT:    v_writelane_b32 v39, s19, 20
 ; GFX906-NEXT:    ;;#ASMSTART
 ; GFX906-NEXT:    ; def s20
 ; GFX906-NEXT:    ;;#ASMEND
-; GFX906-NEXT:    v_writelane_b32 v40, s20, 21
+; GFX906-NEXT:    v_writelane_b32 v39, s20, 21
 ; GFX906-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX906-NEXT:    v_writelane_b32 v40, s10, 22
-; GFX906-NEXT:    v_writelane_b32 v40, s11, 23
+; GFX906-NEXT:    v_writelane_b32 v39, s10, 22
+; GFX906-NEXT:    v_writelane_b32 v39, s11, 23
+; GFX906-NEXT:    s_or_saveexec_b64 s[34:35], -1
+; GFX906-NEXT:    s_mov_b64 exec, s[34:35]
 ; GFX906-NEXT:    s_or_saveexec_b64 s[34:35], -1
+; GFX906-NEXT:    buffer_store_dword v39, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
 ; GFX906-NEXT:    s_mov_b64 exec, s[34:35]
-; GFX906-NEXT:    v_readlane_b32 s16, v40, 22
+; GFX906-NEXT:    v_readlane_b32 s16, v39, 22
 ; GFX906-NEXT:    s_mov_b32 s12, s24
 ; GFX906-NEXT:    s_mov_b32 s13, s23
 ; GFX906-NEXT:    s_mov_b32 s14, s22
 ; GFX906-NEXT:    v_mov_b32_e32 v31, v32
 ; GFX906-NEXT:    s_mov_b32 s15, s21
 ; GFX906-NEXT:    s_mov_b64 s[10:11], s[26:27]
-; GFX906-NEXT:    v_readlane_b32 s17, v40, 23
-; GFX906-NEXT:    buffer_store_dword v32, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
+; GFX906-NEXT:    v_readlane_b32 s17, v39, 23
+; GFX906-NEXT:    v_mov_b32_e32 v40, v32
 ; GFX906-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX906-NEXT:    s_or_saveexec_b64 s[34:35], -1
+; GFX906-NEXT:    buffer_load_dword v39, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
 ; GFX906-NEXT:    s_mov_b64 exec, s[34:35]
-; GFX906-NEXT:    v_readlane_b32 s11, v40, 12
+; GFX906-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-NEXT:    v_readlane_b32 s11, v39, 12
 ; GFX906-NEXT:    ;;#ASMSTART
 ; GFX906-NEXT:    ; use s11
 ; GFX906-NEXT:    ;;#ASMEND
-; GFX906-NEXT:    v_readlane_b32 s12, v40, 13
+; GFX906-NEXT:    v_readlane_b32 s12, v39, 13
 ; GFX906-NEXT:    ;;#ASMSTART
 ; GFX906-NEXT:    ; use s12
 ; GFX906-NEXT:    ;;#ASMEND
-; GFX906-NEXT:    v_readlane_b32 s13, v40, 14
+; GFX906-NEXT:    v_readlane_b32 s13, v39, 14
 ; GFX906-NEXT:    ;;#ASMSTART
 ; GFX906-NEXT:    ; use s13
 ; GFX906-NEXT:    ;;#ASMEND
-; GFX906-NEXT:    v_readlane_b32 s14, v40, 15
+; GFX906-NEXT:    v_readlane_b32 s14, v39, 15
 ; GFX906-NEXT:    ;;#ASMSTART
 ; GFX906-NEXT:    ; use s14
 ; GFX906-NEXT:    ;;#ASMEND
-; GFX906-NEXT:    v_readlane_b32 s15, v40, 16
+; GFX906-NEXT:    v_readlane_b32 s15, v39, 16
 ; GFX906-NEXT:    ;;#ASMSTART
 ; GFX906-NEXT:    ; use s15
 ; GFX906-NEXT:    ;;#ASMEND
-; GFX906-NEXT:    v_readlane_b32 s16, v40, 17
+; GFX906-NEXT:    v_readlane_b32 s16, v39, 17
 ; GFX906-NEXT:    ;;#ASMSTART
 ; GFX906-NEXT:    ; use s16
 ; GFX906-NEXT:    ;;#ASMEND
-; GFX906-NEXT:    v_readlane_b32 s17, v40, 18
+; GFX906-NEXT:    v_readlane_b32 s17, v39, 18
 ; GFX906-NEXT:    ;;#ASMSTART
 ; GFX906-NEXT:    ; use s17
 ; GFX906-NEXT:    ;;#ASMEND
-; GFX906-NEXT:    v_readlane_b32 s18, v40, 19
+; GFX906-NEXT:    v_readlane_b32 s18, v39, 19
 ; GFX906-NEXT:    ;;#ASMSTART
 ; GFX906-NEXT:    ; use s18
 ; GFX906-NEXT:    ;;#ASMEND
-; GFX906-NEXT:    v_readlane_b32 s19, v40, 20
+; GFX906-NEXT:    v_readlane_b32 s19, v39, 20
 ; GFX906-NEXT:    ;;#ASMSTART
 ; GFX906-NEXT:    ; use s19
 ; GFX906-NEXT:    ;;#ASMEND
-; GFX906-NEXT:    v_readlane_b32 s20, v40, 21
+; GFX906-NEXT:    v_readlane_b32 s20, v39, 21
 ; GFX906-NEXT:    ;;#ASMSTART
 ; GFX906-NEXT:    ; use s20
 ; GFX906-NEXT:    ;;#ASMEND
 ; GFX906-NEXT:    ;;#ASMSTART
 ; GFX906-NEXT:    ; def s21
 ; GFX906-NEXT:    ;;#ASMEND
+; GFX906-NEXT:    v_writelane_b32 v39, s21, 24
 ; GFX906-NEXT:    ;;#ASMSTART
 ; GFX906-NEXT:    ; def s22
 ; GFX906-NEXT:    ;;#ASMEND
+; GFX906-NEXT:    v_writelane_b32 v39, s22, 25
 ; GFX906-NEXT:    ;;#ASMSTART
 ; GFX906-NEXT:    ; def s23
 ; GFX906-NEXT:    ;;#ASMEND
+; GFX906-NEXT:    v_writelane_b32 v39, s23, 26
 ; GFX906-NEXT:    ;;#ASMSTART
 ; GFX906-NEXT:    ; def s24
 ; GFX906-NEXT:    ;;#ASMEND
+; GFX906-NEXT:    v_writelane_b32 v39, s24, 27
 ; GFX906-NEXT:    ;;#ASMSTART
 ; GFX906-NEXT:    ; def s25
 ; GFX906-NEXT:    ;;#ASMEND
+; GFX906-NEXT:    v_writelane_b32 v39, s25, 28
 ; GFX906-NEXT:    ;;#ASMSTART
 ; GFX906-NEXT:    ; def s26
 ; GFX906-NEXT:    ;;#ASMEND
+; GFX906-NEXT:    v_writelane_b32 v39, s26, 29
 ; GFX906-NEXT:    ;;#ASMSTART
 ; GFX906-NEXT:    ; def s27
 ; GFX906-NEXT:    ;;#ASMEND
+; GFX906-NEXT:    v_writelane_b32 v39, s27, 30
 ; GFX906-NEXT:    ;;#ASMSTART
 ; GFX906-NEXT:    ; def s28
 ; GFX906-NEXT:    ;;#ASMEND
+; GFX906-NEXT:    v_writelane_b32 v39, s28, 31
 ; GFX906-NEXT:    ;;#ASMSTART
 ; GFX906-NEXT:    ; def s29
 ; GFX906-NEXT:    ;;#ASMEND
-; GFX906-NEXT:    buffer_load_dword v31, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
-; GFX906-NEXT:    v_writelane_b32 v40, s21, 24
-; GFX906-NEXT:    v_writelane_b32 v40, s22, 25
-; GFX906-NEXT:    v_writelane_b32 v40, s23, 26
-; GFX906-NEXT:    v_writelane_b32 v40, s24, 27
-; GFX906-NEXT:    v_writelane_b32 v40, s25, 28
-; GFX906-NEXT:    v_writelane_b32 v40, s26, 29
-; GFX906-NEXT:    v_writelane_b32 v40, s27, 30
-; GFX906-NEXT:    v_writelane_b32 v40, s28, 31
-; GFX906-NEXT:    v_writelane_b32 v40, s29, 32
-; GFX906-NEXT:    v_readlane_b32 s4, v40, 10
-; GFX906-NEXT:    v_readlane_b32 s6, v40, 0
-; GFX906-NEXT:    v_readlane_b32 s8, v40, 8
-; GFX906-NEXT:    v_readlane_b32 s10, v40, 6
-; GFX906-NEXT:    v_readlane_b32 s16, v40, 22
-; GFX906-NEXT:    v_readlane_b32 s12, v40, 5
-; GFX906-NEXT:    v_readlane_b32 s13, v40, 4
-; GFX906-NEXT:    v_readlane_b32 s14, v40, 3
-; GFX906-NEXT:    v_readlane_b32 s15, v40, 2
-; GFX906-NEXT:    v_readlane_b32 s5, v40, 11
-; GFX906-NEXT:    v_readlane_b32 s7, v40, 1
-; GFX906-NEXT:    v_readlane_b32 s9, v40, 9
-; GFX906-NEXT:    v_readlane_b32 s11, v40, 7
-; GFX906-NEXT:    v_readlane_b32 s17, v40, 23
+; GFX906-NEXT:    v_writelane_b32 v39, s29, 32
 ; GFX906-NEXT:    s_or_saveexec_b64 s[34:35], -1
+; GFX906-NEXT:    buffer_store_dword v39, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
 ; GFX906-NEXT:    s_mov_b64 exec, s[34:35]
+; GFX906-NEXT:    v_readlane_b32 s4, v39, 10
+; GFX906-NEXT:    v_readlane_b32 s6, v39, 0
+; GFX906-NEXT:    v_readlane_b32 s8, v39, 8
+; GFX906-NEXT:    v_readlane_b32 s10, v39, 6
+; GFX906-NEXT:    v_readlane_b32 s16, v39, 22
+; GFX906-NEXT:    v_readlane_b32 s12, v39, 5
+; GFX906-NEXT:    v_mov_b32_e32 v31, v40
+; GFX906-NEXT:    v_readlane_b32 s13, v39, 4
+; GFX906-NEXT:    v_readlane_b32 s14, v39, 3
+; GFX906-NEXT:    v_readlane_b32 s15, v39, 2
+; GFX906-NEXT:    v_readlane_b32 s5, v39, 11
+; GFX906-NEXT:    v_readlane_b32 s7, v39, 1
+; GFX906-NEXT:    v_readlane_b32 s9, v39, 9
+; GFX906-NEXT:    v_readlane_b32 s11, v39, 7
+; GFX906-NEXT:    v_readlane_b32 s17, v39, 23
 ; GFX906-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX906-NEXT:    s_or_saveexec_b64 s[34:35], -1
+; GFX906-NEXT:    buffer_load_dword v39, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
 ; GFX906-NEXT:    s_mov_b64 exec, s[34:35]
-; GFX906-NEXT:    v_readlane_b32 s21, v40, 24
+; GFX906-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-NEXT:    v_readlane_b32 s4, v39, 10
+; GFX906-NEXT:    v_readlane_b32 s6, v39, 0
+; GFX906-NEXT:    v_readlane_b32 s8, v39, 8
+; GFX906-NEXT:    v_readlane_b32 s10, v39, 6
+; GFX906-NEXT:    v_readlane_b32 s16, v39, 22
+; GFX906-NEXT:    v_readlane_b32 s5, v39, 11
+; GFX906-NEXT:    v_readlane_b32 s7, v39, 1
+; GFX906-NEXT:    v_readlane_b32 s9, v39, 9
+; GFX906-NEXT:    v_readlane_b32 s11, v39, 7
+; GFX906-NEXT:    v_readlane_b32 s12, v39, 5
+; GFX906-NEXT:    v_readlane_b32 s13, v39, 4
+; GFX906-NEXT:    v_readlane_b32 s14, v39, 3
+; GFX906-NEXT:    v_readlane_b32 s15, v39, 2
+; GFX906-NEXT:    v_mov_b32_e32 v31, v40
+; GFX906-NEXT:    v_readlane_b32 s17, v39, 23
+; GFX906-NEXT:    v_readlane_b32 s21, v39, 24
 ; GFX906-NEXT:    ;;#ASMSTART
 ; GFX906-NEXT:    ; use s21
 ; GFX906-NEXT:    ;;#ASMEND
-; GFX906-NEXT:    v_readlane_b32 s22, v40, 25
+; GFX906-NEXT:    v_readlane_b32 s22, v39, 25
 ; GFX906-NEXT:    ;;#ASMSTART
 ; GFX906-NEXT:    ; use s22
 ; GFX906-NEXT:    ;;#ASMEND
-; GFX906-NEXT:    v_readlane_b32 s23, v40, 26
+; GFX906-NEXT:    v_readlane_b32 s23, v39, 26
 ; GFX906-NEXT:    ;;#ASMSTART
 ; GFX906-NEXT:    ; use s23
 ; GFX906-NEXT:    ;;#ASMEND
-; GFX906-NEXT:    v_readlane_b32 s24, v40, 27
+; GFX906-NEXT:    v_readlane_b32 s24, v39, 27
 ; GFX906-NEXT:    ;;#ASMSTART
 ; GFX906-NEXT:    ; use s24
 ; GFX906-NEXT:    ;;#ASMEND
-; GFX906-NEXT:    v_readlane_b32 s25, v40, 28
+; GFX906-NEXT:    v_readlane_b32 s25, v39, 28
 ; GFX906-NEXT:    ;;#ASMSTART
 ; GFX906-NEXT:    ; use s25
 ; GFX906-NEXT:    ;;#ASMEND
-; GFX906-NEXT:    v_readlane_b32 s26, v40, 29
+; GFX906-NEXT:    v_readlane_b32 s26, v39, 29
 ; GFX906-NEXT:    ;;#ASMSTART
 ; GFX906-NEXT:    ; use s26
 ; GFX906-NEXT:    ;;#ASMEND
-; GFX906-NEXT:    v_readlane_b32 s27, v40, 30
+; GFX906-NEXT:    v_readlane_b32 s27, v39, 30
 ; GFX906-NEXT:    ;;#ASMSTART
 ; GFX906-NEXT:    ; use s27
 ; GFX906-NEXT:    ;;#ASMEND
-; GFX906-NEXT:    v_readlane_b32 s28, v40, 31
+; GFX906-NEXT:    v_readlane_b32 s28, v39, 31
 ; GFX906-NEXT:    ;;#ASMSTART
 ; GFX906-NEXT:    ; use s28
 ; GFX906-NEXT:    ;;#ASMEND
-; GFX906-NEXT:    v_readlane_b32 s29, v40, 32
+; GFX906-NEXT:    v_readlane_b32 s29, v39, 32
 ; GFX906-NEXT:    ;;#ASMSTART
 ; GFX906-NEXT:    ; use s29
 ; GFX906-NEXT:    ;;#ASMEND
-; GFX906-NEXT:    buffer_load_dword v31, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
-; GFX906-NEXT:    v_readlane_b32 s4, v40, 10
-; GFX906-NEXT:    v_readlane_b32 s6, v40, 0
-; GFX906-NEXT:    v_readlane_b32 s8, v40, 8
-; GFX906-NEXT:    v_readlane_b32 s10, v40, 6
-; GFX906-NEXT:    v_readlane_b32 s16, v40, 22
-; GFX906-NEXT:    v_readlane_b32 s5, v40, 11
-; GFX906-NEXT:    v_readlane_b32 s7, v40, 1
-; GFX906-NEXT:    v_readlane_b32 s9, v40, 9
-; GFX906-NEXT:    v_readlane_b32 s11, v40, 7
-; GFX906-NEXT:    v_readlane_b32 s12, v40, 5
-; GFX906-NEXT:    v_readlane_b32 s13, v40, 4
-; GFX906-NEXT:    v_readlane_b32 s14, v40, 3
-; GFX906-NEXT:    v_readlane_b32 s15, v40, 2
-; GFX906-NEXT:    v_readlane_b32 s17, v40, 23
-; GFX906-NEXT:    s_or_saveexec_b64 s[34:35], -1
-; GFX906-NEXT:    s_mov_b64 exec, s[34:35]
 ; GFX906-NEXT:    s_swappc_b64 s[30:31], s[16:17]
-; GFX906-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
-; GFX906-NEXT:    buffer_load_dword v1, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
-; GFX906-NEXT:    buffer_load_dword v2, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload
-; GFX906-NEXT:    buffer_load_dword v3, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload
-; GFX906-NEXT:    buffer_load_dword v4, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload
-; GFX906-NEXT:    buffer_load_dword v5, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload
-; GFX906-NEXT:    buffer_load_dword v6, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload
-; GFX906-NEXT:    buffer_load_dword v7, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload
-; GFX906-NEXT:    buffer_load_dword v8, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload
-; GFX906-NEXT:    buffer_load_dword v9, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload
-; GFX906-NEXT:    buffer_load_dword v10, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload
-; GFX906-NEXT:    buffer_load_dword v11, off, s[0:3], s33 offset:52 ; 4-byte Folded Reload
-; GFX906-NEXT:    buffer_load_dword v12, off, s[0:3], s33 offset:56 ; 4-byte Folded Reload
-; GFX906-NEXT:    buffer_load_dword v13, off, s[0:3], s33 offset:60 ; 4-byte Folded Reload
-; GFX906-NEXT:    buffer_load_dword v14, off, s[0:3], s33 offset:64 ; 4-byte Folded Reload
-; GFX906-NEXT:    buffer_load_dword v15, off, s[0:3], s33 offset:68 ; 4-byte Folded Reload
-; GFX906-NEXT:    buffer_load_dword v16, off, s[0:3], s33 offset:72 ; 4-byte Folded Reload
-; GFX906-NEXT:    buffer_load_dword v17, off, s[0:3], s33 offset:76 ; 4-byte Folded Reload
-; GFX906-NEXT:    buffer_load_dword v18, off, s[0:3], s33 offset:80 ; 4-byte Folded Reload
-; GFX906-NEXT:    buffer_load_dword v19, off, s[0:3], s33 offset:84 ; 4-byte Folded Reload
-; GFX906-NEXT:    buffer_load_dword v20, off, s[0:3], s33 offset:88 ; 4-byte Folded Reload
-; GFX906-NEXT:    buffer_load_dword v21, off, s[0:3], s33 offset:92 ; 4-byte Folded Reload
-; GFX906-NEXT:    buffer_load_dword v22, off, s[0:3], s33 offset:96 ; 4-byte Folded Reload
-; GFX906-NEXT:    buffer_load_dword v23, off, s[0:3], s33 offset:100 ; 4-byte Folded Reload
-; GFX906-NEXT:    buffer_load_dword v24, off, s[0:3], s33 offset:104 ; 4-byte Folded Reload
-; GFX906-NEXT:    buffer_load_dword v25, off, s[0:3], s33 offset:108 ; 4-byte Folded Reload
-; GFX906-NEXT:    buffer_load_dword v26, off, s[0:3], s33 offset:112 ; 4-byte Folded Reload
-; GFX906-NEXT:    buffer_load_dword v27, off, s[0:3], s33 offset:116 ; 4-byte Folded Reload
-; GFX906-NEXT:    buffer_load_dword v28, off, s[0:3], s33 offset:120 ; 4-byte Folded Reload
-; GFX906-NEXT:    buffer_load_dword v29, off, s[0:3], s33 offset:124 ; 4-byte Folded Reload
-; GFX906-NEXT:    buffer_load_dword v30, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload
-; GFX906-NEXT:    buffer_load_dword v31, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload
-; GFX906-NEXT:    buffer_load_dword v32, off, s[0:3], s33 offset:136 ; 4-byte Folded Reload
-; GFX906-NEXT:    buffer_load_dword v33, off, s[0:3], s33 offset:140 ; 4-byte Folded Reload
+; GFX906-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
+; GFX906-NEXT:    buffer_load_dword v1, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload
+; GFX906-NEXT:    buffer_load_dword v2, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload
+; GFX906-NEXT:    buffer_load_dword v3, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload
+; GFX906-NEXT:    buffer_load_dword v4, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload
+; GFX906-NEXT:    buffer_load_dword v5, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload
+; GFX906-NEXT:    buffer_load_dword v6, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload
+; GFX906-NEXT:    buffer_load_dword v7, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload
+; GFX906-NEXT:    buffer_load_dword v8, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload
+; GFX906-NEXT:    buffer_load_dword v9, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload
+; GFX906-NEXT:    buffer_load_dword v10, off, s[0:3], s33 offset:52 ; 4-byte Folded Reload
+; GFX906-NEXT:    buffer_load_dword v11, off, s[0:3], s33 offset:56 ; 4-byte Folded Reload
+; GFX906-NEXT:    buffer_load_dword v12, off, s[0:3], s33 offset:60 ; 4-byte Folded Reload
+; GFX906-NEXT:    buffer_load_dword v13, off, s[0:3], s33 offset:64 ; 4-byte Folded Reload
+; GFX906-NEXT:    buffer_load_dword v14, off, s[0:3], s33 offset:68 ; 4-byte Folded Reload
+; GFX906-NEXT:    buffer_load_dword v15, off, s[0:3], s33 offset:72 ; 4-byte Folded Reload
+; GFX906-NEXT:    buffer_load_dword v16, off, s[0:3], s33 offset:76 ; 4-byte Folded Reload
+; GFX906-NEXT:    buffer_load_dword v17, off, s[0:3], s33 offset:80 ; 4-byte Folded Reload
+; GFX906-NEXT:    buffer_load_dword v18, off, s[0:3], s33 offset:84 ; 4-byte Folded Reload
+; GFX906-NEXT:    buffer_load_dword v19, off, s[0:3], s33 offset:88 ; 4-byte Folded Reload
+; GFX906-NEXT:    buffer_load_dword v20, off, s[0:3], s33 offset:92 ; 4-byte Folded Reload
+; GFX906-NEXT:    buffer_load_dword v21, off, s[0:3], s33 offset:96 ; 4-byte Folded Reload
+; GFX906-NEXT:    buffer_load_dword v22, off, s[0:3], s33 offset:100 ; 4-byte Folded Reload
+; GFX906-NEXT:    buffer_load_dword v23, off, s[0:3], s33 offset:104 ; 4-byte Folded Reload
+; GFX906-NEXT:    buffer_load_dword v24, off, s[0:3], s33 offset:108 ; 4-byte Folded Reload
+; GFX906-NEXT:    buffer_load_dword v25, off, s[0:3], s33 offset:112 ; 4-byte Folded Reload
+; GFX906-NEXT:    buffer_load_dword v26, off, s[0:3], s33 offset:116 ; 4-byte Folded Reload
+; GFX906-NEXT:    buffer_load_dword v27, off, s[0:3], s33 offset:120 ; 4-byte Folded Reload
+; GFX906-NEXT:    buffer_load_dword v28, off, s[0:3], s33 offset:124 ; 4-byte Folded Reload
+; GFX906-NEXT:    buffer_load_dword v29, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload
+; GFX906-NEXT:    buffer_load_dword v30, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload
+; GFX906-NEXT:    buffer_load_dword v31, off, s[0:3], s33 offset:136 ; 4-byte Folded Reload
+; GFX906-NEXT:    buffer_load_dword v32, off, s[0:3], s33 offset:140 ; 4-byte Folded Reload
+; GFX906-NEXT:    buffer_load_dword v33, off, s[0:3], s33 offset:144 ; 4-byte Folded Reload
 ; GFX906-NEXT:    v_readlane_b32 s31, v41, 1
 ; GFX906-NEXT:    v_readlane_b32 s30, v41, 0
-; GFX906-NEXT:    ; kill: killed $vgpr40
 ; GFX906-NEXT:    v_readlane_b32 s4, v41, 4
 ; GFX906-NEXT:    v_readlane_b32 s34, v41, 2
 ; GFX906-NEXT:    v_readlane_b32 s35, v41, 3
@@ -360,12 +360,11 @@ define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 {
 ; GFX906-NEXT:    s_waitcnt vmcnt(0)
 ; GFX906-NEXT:    flat_store_dwordx4 v[0:1], v[2:5]
 ; GFX906-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
 ; GFX906-NEXT:    s_xor_saveexec_b64 s[6:7], -1
-; GFX906-NEXT:    buffer_load_dword v33, off, s[0:3], s33 offset:144 ; 4-byte Folded Reload
-; GFX906-NEXT:    buffer_load_dword v2, off, s[0:3], s33 offset:152 ; 4-byte Folded Reload
+; GFX906-NEXT:    buffer_load_dword v39, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
 ; GFX906-NEXT:    s_mov_b64 exec, -1
-; GFX906-NEXT:    buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
-; GFX906-NEXT:    buffer_load_dword v40, off, s[0:3], s33 offset:148 ; 4-byte Folded Reload
+; GFX906-NEXT:    buffer_load_dword v41, off, s[0:3], s33 offset:148 ; 4-byte Folded Reload
 ; GFX906-NEXT:    s_mov_b64 exec, s[6:7]
 ; GFX906-NEXT:    s_addk_i32 s32, 0xd800
 ; GFX906-NEXT:    s_mov_b32 s33, s4
@@ -378,346 +377,346 @@ define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 {
 ; GFX908-NEXT:    s_mov_b32 s16, s33
 ; GFX908-NEXT:    s_mov_b32 s33, s32
 ; GFX908-NEXT:    s_xor_saveexec_b64 s[18:19], -1
-; GFX908-NEXT:    buffer_store_dword v33, off, s[0:3], s33 offset:148 ; 4-byte Folded Spill
-; GFX908-NEXT:    buffer_store_dword v2, off, s[0:3], s33 offset:156 ; 4-byte Folded Spill
-; GFX908-NEXT:    s_mov_b64 exec, -1
-; GFX908-NEXT:    buffer_store_dword v40, off, s[0:3], s33 offset:152 ; 4-byte Folded Spill
+; GFX908-NEXT:    buffer_store_dword v39, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill
 ; GFX908-NEXT:    s_mov_b64 exec, s[18:19]
-; GFX908-NEXT:    v_mov_b32_e32 v3, s16
-; GFX908-NEXT:    buffer_store_dword v3, off, s[0:3], s33 offset:168 ; 4-byte Folded Spill
-; GFX908-NEXT:    v_mov_b32_e32 v3, s34
-; GFX908-NEXT:    buffer_store_dword v3, off, s[0:3], s33 offset:160 ; 4-byte Folded Spill
-; GFX908-NEXT:    v_mov_b32_e32 v3, s35
-; GFX908-NEXT:    buffer_store_dword v3, off, s[0:3], s33 offset:164 ; 4-byte Folded Spill
+; GFX908-NEXT:    v_mov_b32_e32 v2, s16
+; GFX908-NEXT:    buffer_store_dword v2, off, s[0:3], s33 offset:164 ; 4-byte Folded Spill
+; GFX908-NEXT:    v_mov_b32_e32 v2, s34
+; GFX908-NEXT:    buffer_store_dword v2, off, s[0:3], s33 offset:156 ; 4-byte Folded Spill
+; GFX908-NEXT:    v_mov_b32_e32 v2, s35
+; GFX908-NEXT:    buffer_store_dword v2, off, s[0:3], s33 offset:160 ; 4-byte Folded Spill
 ; GFX908-NEXT:    s_addk_i32 s32, 0x2c00
+; GFX908-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
 ; GFX908-NEXT:    s_mov_b64 s[16:17], exec
 ; GFX908-NEXT:    s_mov_b64 exec, 1
-; GFX908-NEXT:    buffer_store_dword v2, off, s[0:3], s33 offset:172
+; GFX908-NEXT:    buffer_store_dword v2, off, s[0:3], s33 offset:168
 ; GFX908-NEXT:    v_writelane_b32 v2, s30, 0
-; GFX908-NEXT:    buffer_store_dword v2, off, s[0:3], s33 ; 4-byte Folded Spill
-; GFX908-NEXT:    buffer_load_dword v2, off, s[0:3], s33 offset:172
+; GFX908-NEXT:    buffer_store_dword v2, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
+; GFX908-NEXT:    buffer_load_dword v2, off, s[0:3], s33 offset:168
 ; GFX908-NEXT:    s_waitcnt vmcnt(0)
 ; GFX908-NEXT:    s_mov_b64 exec, s[16:17]
 ; GFX908-NEXT:    s_mov_b64 s[16:17], exec
 ; GFX908-NEXT:    s_mov_b64 exec, 1
-; GFX908-NEXT:    buffer_store_dword v2, off, s[0:3], s33 offset:172
+; GFX908-NEXT:    buffer_store_dword v2, off, s[0:3], s33 offset:168
 ; GFX908-NEXT:    v_writelane_b32 v2, s31, 0
-; GFX908-NEXT:    buffer_store_dword v2, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
-; GFX908-NEXT:    buffer_load_dword v2, off, s[0:3], s33 offset:172
+; GFX908-NEXT:    buffer_store_dword v2, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
+; GFX908-NEXT:    buffer_load_dword v2, off, s[0:3], s33 offset:168
 ; GFX908-NEXT:    s_waitcnt vmcnt(0)
 ; GFX908-NEXT:    s_mov_b64 exec, s[16:17]
-; GFX908-NEXT:    ; implicit-def: $vgpr2
+; GFX908-NEXT:    ; implicit-def: $vgpr39 : SGPR spill to VGPR lane
 ; GFX908-NEXT:    s_mov_b32 s21, s15
-; GFX908-NEXT:    v_writelane_b32 v2, s6, 0
-; GFX908-NEXT:    v_writelane_b32 v2, s7, 1
-; GFX908-NEXT:    v_writelane_b32 v2, s21, 2
+; GFX908-NEXT:    v_writelane_b32 v39, s6, 0
+; GFX908-NEXT:    v_writelane_b32 v39, s7, 1
+; GFX908-NEXT:    v_writelane_b32 v39, s21, 2
 ; GFX908-NEXT:    s_mov_b32 s22, s14
-; GFX908-NEXT:    v_writelane_b32 v2, s22, 3
+; GFX908-NEXT:    v_writelane_b32 v39, s22, 3
 ; GFX908-NEXT:    s_mov_b32 s23, s13
-; GFX908-NEXT:    v_writelane_b32 v2, s23, 4
+; GFX908-NEXT:    v_writelane_b32 v39, s23, 4
 ; GFX908-NEXT:    s_mov_b32 s24, s12
-; GFX908-NEXT:    v_writelane_b32 v2, s24, 5
+; GFX908-NEXT:    v_writelane_b32 v39, s24, 5
 ; GFX908-NEXT:    s_mov_b64 s[26:27], s[10:11]
-; GFX908-NEXT:    v_writelane_b32 v2, s26, 6
-; GFX908-NEXT:    v_writelane_b32 v2, s27, 7
-; GFX908-NEXT:    v_writelane_b32 v2, s8, 8
-; GFX908-NEXT:    v_writelane_b32 v2, s9, 9
-; GFX908-NEXT:    v_writelane_b32 v2, s4, 10
+; GFX908-NEXT:    v_writelane_b32 v39, s26, 6
+; GFX908-NEXT:    v_writelane_b32 v39, s27, 7
+; GFX908-NEXT:    v_writelane_b32 v39, s8, 8
+; GFX908-NEXT:    v_writelane_b32 v39, s9, 9
+; GFX908-NEXT:    v_writelane_b32 v39, s4, 10
 ; GFX908-NEXT:    v_mov_b32_e32 v32, v31
-; GFX908-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
+; GFX908-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill
 ; GFX908-NEXT:    s_waitcnt vmcnt(0)
-; GFX908-NEXT:    buffer_store_dword v1, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill
-; GFX908-NEXT:    v_writelane_b32 v2, s5, 11
+; GFX908-NEXT:    buffer_store_dword v1, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill
+; GFX908-NEXT:    v_writelane_b32 v39, s5, 11
 ; GFX908-NEXT:    s_or_saveexec_b64 s[34:35], -1
-; GFX908-NEXT:    v_mov_b32_e32 v33, v2
 ; GFX908-NEXT:    s_mov_b64 exec, s[34:35]
 ; GFX908-NEXT:    ;;#ASMSTART
 ; GFX908-NEXT:    ; def v[0:31]
 ; GFX908-NEXT:    ;;#ASMEND
-; GFX908-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill
+; GFX908-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill
 ; GFX908-NEXT:    s_waitcnt vmcnt(0)
-; GFX908-NEXT:    buffer_store_dword v1, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill
-; GFX908-NEXT:    buffer_store_dword v2, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill
-; GFX908-NEXT:    buffer_store_dword v3, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill
-; GFX908-NEXT:    buffer_store_dword v4, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill
-; GFX908-NEXT:    buffer_store_dword v5, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill
-; GFX908-NEXT:    buffer_store_dword v6, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill
-; GFX908-NEXT:    buffer_store_dword v7, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill
-; GFX908-NEXT:    buffer_store_dword v8, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill
-; GFX908-NEXT:    buffer_store_dword v9, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill
-; GFX908-NEXT:    buffer_store_dword v10, off, s[0:3], s33 offset:60 ; 4-byte Folded Spill
-; GFX908-NEXT:    buffer_store_dword v11, off, s[0:3], s33 offset:64 ; 4-byte Folded Spill
-; GFX908-NEXT:    buffer_store_dword v12, off, s[0:3], s33 offset:68 ; 4-byte Folded Spill
-; GFX908-NEXT:    buffer_store_dword v13, off, s[0:3], s33 offset:72 ; 4-byte Folded Spill
-; GFX908-NEXT:    buffer_store_dword v14, off, s[0:3], s33 offset:76 ; 4-byte Folded Spill
-; GFX908-NEXT:    buffer_store_dword v15, off, s[0:3], s33 offset:80 ; 4-byte Folded Spill
-; GFX908-NEXT:    buffer_store_dword v16, off, s[0:3], s33 offset:84 ; 4-byte Folded Spill
-; GFX908-NEXT:    buffer_store_dword v17, off, s[0:3], s33 offset:88 ; 4-byte Folded Spill
-; GFX908-NEXT:    buffer_store_dword v18, off, s[0:3], s33 offset:92 ; 4-byte Folded Spill
-; GFX908-NEXT:    buffer_store_dword v19, off, s[0:3], s33 offset:96 ; 4-byte Folded Spill
-; GFX908-NEXT:    buffer_store_dword v20, off, s[0:3], s33 offset:100 ; 4-byte Folded Spill
-; GFX908-NEXT:    buffer_store_dword v21, off, s[0:3], s33 offset:104 ; 4-byte Folded Spill
-; GFX908-NEXT:    buffer_store_dword v22, off, s[0:3], s33 offset:108 ; 4-byte Folded Spill
-; GFX908-NEXT:    buffer_store_dword v23, off, s[0:3], s33 offset:112 ; 4-byte Folded Spill
-; GFX908-NEXT:    buffer_store_dword v24, off, s[0:3], s33 offset:116 ; 4-byte Folded Spill
-; GFX908-NEXT:    buffer_store_dword v25, off, s[0:3], s33 offset:120 ; 4-byte Folded Spill
-; GFX908-NEXT:    buffer_store_dword v26, off, s[0:3], s33 offset:124 ; 4-byte Folded Spill
-; GFX908-NEXT:    buffer_store_dword v27, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill
-; GFX908-NEXT:    buffer_store_dword v28, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill
-; GFX908-NEXT:    buffer_store_dword v29, off, s[0:3], s33 offset:136 ; 4-byte Folded Spill
-; GFX908-NEXT:    buffer_store_dword v30, off, s[0:3], s33 offset:140 ; 4-byte Folded Spill
-; GFX908-NEXT:    buffer_store_dword v31, off, s[0:3], s33 offset:144 ; 4-byte Folded Spill
+; GFX908-NEXT:    buffer_store_dword v1, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill
+; GFX908-NEXT:    buffer_store_dword v2, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill
+; GFX908-NEXT:    buffer_store_dword v3, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill
+; GFX908-NEXT:    buffer_store_dword v4, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill
+; GFX908-NEXT:    buffer_store_dword v5, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill
+; GFX908-NEXT:    buffer_store_dword v6, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill
+; GFX908-NEXT:    buffer_store_dword v7, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill
+; GFX908-NEXT:    buffer_store_dword v8, off, s[0:3], s33 offset:60 ; 4-byte Folded Spill
+; GFX908-NEXT:    buffer_store_dword v9, off, s[0:3], s33 offset:64 ; 4-byte Folded Spill
+; GFX908-NEXT:    buffer_store_dword v10, off, s[0:3], s33 offset:68 ; 4-byte Folded Spill
+; GFX908-NEXT:    buffer_store_dword v11, off, s[0:3], s33 offset:72 ; 4-byte Folded Spill
+; GFX908-NEXT:    buffer_store_dword v12, off, s[0:3], s33 offset:76 ; 4-byte Folded Spill
+; GFX908-NEXT:    buffer_store_dword v13, off, s[0:3], s33 offset:80 ; 4-byte Folded Spill
+; GFX908-NEXT:    buffer_store_dword v14, off, s[0:3], s33 offset:84 ; 4-byte Folded Spill
+; GFX908-NEXT:    buffer_store_dword v15, off, s[0:3], s33 offset:88 ; 4-byte Folded Spill
+; GFX908-NEXT:    buffer_store_dword v16, off, s[0:3], s33 offset:92 ; 4-byte Folded Spill
+; GFX908-NEXT:    buffer_store_dword v17, off, s[0:3], s33 offset:96 ; 4-byte Folded Spill
+; GFX908-NEXT:    buffer_store_dword v18, off, s[0:3], s33 offset:100 ; 4-byte Folded Spill
+; GFX908-NEXT:    buffer_store_dword v19, off, s[0:3], s33 offset:104 ; 4-byte Folded Spill
+; GFX908-NEXT:    buffer_store_dword v20, off, s[0:3], s33 offset:108 ; 4-byte Folded Spill
+; GFX908-NEXT:    buffer_store_dword v21, off, s[0:3], s33 offset:112 ; 4-byte Folded Spill
+; GFX908-NEXT:    buffer_store_dword v22, off, s[0:3], s33 offset:116 ; 4-byte Folded Spill
+; GFX908-NEXT:    buffer_store_dword v23, off, s[0:3], s33 offset:120 ; 4-byte Folded Spill
+; GFX908-NEXT:    buffer_store_dword v24, off, s[0:3], s33 offset:124 ; 4-byte Folded Spill
+; GFX908-NEXT:    buffer_store_dword v25, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill
+; GFX908-NEXT:    buffer_store_dword v26, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill
+; GFX908-NEXT:    buffer_store_dword v27, off, s[0:3], s33 offset:136 ; 4-byte Folded Spill
+; GFX908-NEXT:    buffer_store_dword v28, off, s[0:3], s33 offset:140 ; 4-byte Folded Spill
+; GFX908-NEXT:    buffer_store_dword v29, off, s[0:3], s33 offset:144 ; 4-byte Folded Spill
+; GFX908-NEXT:    buffer_store_dword v30, off, s[0:3], s33 offset:148 ; 4-byte Folded Spill
+; GFX908-NEXT:    buffer_store_dword v31, off, s[0:3], s33 offset:152 ; 4-byte Folded Spill
 ; GFX908-NEXT:    ;;#ASMSTART
 ; GFX908-NEXT:    ; def v40
 ; GFX908-NEXT:    ;;#ASMEND
 ; GFX908-NEXT:    ;;#ASMSTART
 ; GFX908-NEXT:    ; def s11
 ; GFX908-NEXT:    ;;#ASMEND
-; GFX908-NEXT:    s_or_saveexec_b64 s[34:35], -1
-; GFX908-NEXT:    v_mov_b32_e32 v40, v33
-; GFX908-NEXT:    s_mov_b64 exec, s[34:35]
-; GFX908-NEXT:    v_writelane_b32 v40, s11, 12
+; GFX908-NEXT:    v_writelane_b32 v39, s11, 12
 ; GFX908-NEXT:    ;;#ASMSTART
 ; GFX908-NEXT:    ; def s12
 ; GFX908-NEXT:    ;;#ASMEND
-; GFX908-NEXT:    v_writelane_b32 v40, s12, 13
+; GFX908-NEXT:    v_writelane_b32 v39, s12, 13
 ; GFX908-NEXT:    ;;#ASMSTART
 ; GFX908-NEXT:    ; def s13
 ; GFX908-NEXT:    ;;#ASMEND
-; GFX908-NEXT:    v_writelane_b32 v40, s13, 14
+; GFX908-NEXT:    v_writelane_b32 v39, s13, 14
 ; GFX908-NEXT:    ;;#ASMSTART
 ; GFX908-NEXT:    ; def s14
 ; GFX908-NEXT:    ;;#ASMEND
-; GFX908-NEXT:    v_writelane_b32 v40, s14, 15
+; GFX908-NEXT:    v_writelane_b32 v39, s14, 15
 ; GFX908-NEXT:    ;;#ASMSTART
 ; GFX908-NEXT:    ; def s15
 ; GFX908-NEXT:    ;;#ASMEND
-; GFX908-NEXT:    v_writelane_b32 v40, s15, 16
+; GFX908-NEXT:    v_writelane_b32 v39, s15, 16
 ; GFX908-NEXT:    s_getpc_b64 s[10:11]
 ; GFX908-NEXT:    s_add_u32 s10, s10, foo at gotpcrel32@lo+4
 ; GFX908-NEXT:    s_addc_u32 s11, s11, foo at gotpcrel32@hi+12
 ; GFX908-NEXT:    ;;#ASMSTART
 ; GFX908-NEXT:    ; def s16
 ; GFX908-NEXT:    ;;#ASMEND
-; GFX908-NEXT:    v_writelane_b32 v40, s16, 17
+; GFX908-NEXT:    v_writelane_b32 v39, s16, 17
 ; GFX908-NEXT:    s_load_dwordx2 s[10:11], s[10:11], 0x0
 ; GFX908-NEXT:    ;;#ASMSTART
 ; GFX908-NEXT:    ; def s17
 ; GFX908-NEXT:    ;;#ASMEND
-; GFX908-NEXT:    v_writelane_b32 v40, s17, 18
+; GFX908-NEXT:    v_writelane_b32 v39, s17, 18
 ; GFX908-NEXT:    ;;#ASMSTART
 ; GFX908-NEXT:    ; def s18
 ; GFX908-NEXT:    ;;#ASMEND
-; GFX908-NEXT:    v_writelane_b32 v40, s18, 19
+; GFX908-NEXT:    v_writelane_b32 v39, s18, 19
 ; GFX908-NEXT:    ;;#ASMSTART
 ; GFX908-NEXT:    ; def s19
 ; GFX908-NEXT:    ;;#ASMEND
-; GFX908-NEXT:    v_writelane_b32 v40, s19, 20
+; GFX908-NEXT:    v_writelane_b32 v39, s19, 20
 ; GFX908-NEXT:    ;;#ASMSTART
 ; GFX908-NEXT:    ; def s20
 ; GFX908-NEXT:    ;;#ASMEND
-; GFX908-NEXT:    v_writelane_b32 v40, s20, 21
+; GFX908-NEXT:    v_writelane_b32 v39, s20, 21
 ; GFX908-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX908-NEXT:    v_writelane_b32 v40, s10, 22
-; GFX908-NEXT:    v_writelane_b32 v40, s11, 23
+; GFX908-NEXT:    v_writelane_b32 v39, s10, 22
+; GFX908-NEXT:    v_writelane_b32 v39, s11, 23
 ; GFX908-NEXT:    s_or_saveexec_b64 s[34:35], -1
 ; GFX908-NEXT:    s_mov_b64 exec, s[34:35]
-; GFX908-NEXT:    v_readlane_b32 s16, v40, 22
+; GFX908-NEXT:    s_or_saveexec_b64 s[34:35], -1
+; GFX908-NEXT:    buffer_store_dword v39, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
+; GFX908-NEXT:    s_mov_b64 exec, s[34:35]
+; GFX908-NEXT:    v_readlane_b32 s16, v39, 22
 ; GFX908-NEXT:    s_mov_b32 s12, s24
 ; GFX908-NEXT:    s_mov_b32 s13, s23
 ; GFX908-NEXT:    s_mov_b32 s14, s22
 ; GFX908-NEXT:    v_mov_b32_e32 v31, v32
 ; GFX908-NEXT:    s_mov_b32 s15, s21
 ; GFX908-NEXT:    s_mov_b64 s[10:11], s[26:27]
-; GFX908-NEXT:    v_readlane_b32 s17, v40, 23
-; GFX908-NEXT:    buffer_store_dword v32, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
+; GFX908-NEXT:    v_readlane_b32 s17, v39, 23
+; GFX908-NEXT:    v_mov_b32_e32 v40, v32
 ; GFX908-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX908-NEXT:    s_or_saveexec_b64 s[34:35], -1
+; GFX908-NEXT:    buffer_load_dword v39, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
 ; GFX908-NEXT:    s_mov_b64 exec, s[34:35]
-; GFX908-NEXT:    v_readlane_b32 s11, v40, 12
+; GFX908-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-NEXT:    v_readlane_b32 s11, v39, 12
 ; GFX908-NEXT:    ;;#ASMSTART
 ; GFX908-NEXT:    ; use s11
 ; GFX908-NEXT:    ;;#ASMEND
-; GFX908-NEXT:    v_readlane_b32 s12, v40, 13
+; GFX908-NEXT:    v_readlane_b32 s12, v39, 13
 ; GFX908-NEXT:    ;;#ASMSTART
 ; GFX908-NEXT:    ; use s12
 ; GFX908-NEXT:    ;;#ASMEND
-; GFX908-NEXT:    v_readlane_b32 s13, v40, 14
+; GFX908-NEXT:    v_readlane_b32 s13, v39, 14
 ; GFX908-NEXT:    ;;#ASMSTART
 ; GFX908-NEXT:    ; use s13
 ; GFX908-NEXT:    ;;#ASMEND
-; GFX908-NEXT:    v_readlane_b32 s14, v40, 15
+; GFX908-NEXT:    v_readlane_b32 s14, v39, 15
 ; GFX908-NEXT:    ;;#ASMSTART
 ; GFX908-NEXT:    ; use s14
 ; GFX908-NEXT:    ;;#ASMEND
-; GFX908-NEXT:    v_readlane_b32 s15, v40, 16
+; GFX908-NEXT:    v_readlane_b32 s15, v39, 16
 ; GFX908-NEXT:    ;;#ASMSTART
 ; GFX908-NEXT:    ; use s15
 ; GFX908-NEXT:    ;;#ASMEND
-; GFX908-NEXT:    v_readlane_b32 s16, v40, 17
+; GFX908-NEXT:    v_readlane_b32 s16, v39, 17
 ; GFX908-NEXT:    ;;#ASMSTART
 ; GFX908-NEXT:    ; use s16
 ; GFX908-NEXT:    ;;#ASMEND
-; GFX908-NEXT:    v_readlane_b32 s17, v40, 18
+; GFX908-NEXT:    v_readlane_b32 s17, v39, 18
 ; GFX908-NEXT:    ;;#ASMSTART
 ; GFX908-NEXT:    ; use s17
 ; GFX908-NEXT:    ;;#ASMEND
-; GFX908-NEXT:    v_readlane_b32 s18, v40, 19
+; GFX908-NEXT:    v_readlane_b32 s18, v39, 19
 ; GFX908-NEXT:    ;;#ASMSTART
 ; GFX908-NEXT:    ; use s18
 ; GFX908-NEXT:    ;;#ASMEND
-; GFX908-NEXT:    v_readlane_b32 s19, v40, 20
+; GFX908-NEXT:    v_readlane_b32 s19, v39, 20
 ; GFX908-NEXT:    ;;#ASMSTART
 ; GFX908-NEXT:    ; use s19
 ; GFX908-NEXT:    ;;#ASMEND
-; GFX908-NEXT:    v_readlane_b32 s20, v40, 21
+; GFX908-NEXT:    v_readlane_b32 s20, v39, 21
 ; GFX908-NEXT:    ;;#ASMSTART
 ; GFX908-NEXT:    ; use s20
 ; GFX908-NEXT:    ;;#ASMEND
 ; GFX908-NEXT:    ;;#ASMSTART
 ; GFX908-NEXT:    ; def s21
 ; GFX908-NEXT:    ;;#ASMEND
+; GFX908-NEXT:    v_writelane_b32 v39, s21, 24
 ; GFX908-NEXT:    ;;#ASMSTART
 ; GFX908-NEXT:    ; def s22
 ; GFX908-NEXT:    ;;#ASMEND
+; GFX908-NEXT:    v_writelane_b32 v39, s22, 25
 ; GFX908-NEXT:    ;;#ASMSTART
 ; GFX908-NEXT:    ; def s23
 ; GFX908-NEXT:    ;;#ASMEND
+; GFX908-NEXT:    v_writelane_b32 v39, s23, 26
 ; GFX908-NEXT:    ;;#ASMSTART
 ; GFX908-NEXT:    ; def s24
 ; GFX908-NEXT:    ;;#ASMEND
+; GFX908-NEXT:    v_writelane_b32 v39, s24, 27
 ; GFX908-NEXT:    ;;#ASMSTART
 ; GFX908-NEXT:    ; def s25
 ; GFX908-NEXT:    ;;#ASMEND
+; GFX908-NEXT:    v_writelane_b32 v39, s25, 28
 ; GFX908-NEXT:    ;;#ASMSTART
 ; GFX908-NEXT:    ; def s26
 ; GFX908-NEXT:    ;;#ASMEND
+; GFX908-NEXT:    v_writelane_b32 v39, s26, 29
 ; GFX908-NEXT:    ;;#ASMSTART
 ; GFX908-NEXT:    ; def s27
 ; GFX908-NEXT:    ;;#ASMEND
+; GFX908-NEXT:    v_writelane_b32 v39, s27, 30
 ; GFX908-NEXT:    ;;#ASMSTART
 ; GFX908-NEXT:    ; def s28
 ; GFX908-NEXT:    ;;#ASMEND
+; GFX908-NEXT:    v_writelane_b32 v39, s28, 31
 ; GFX908-NEXT:    ;;#ASMSTART
 ; GFX908-NEXT:    ; def s29
 ; GFX908-NEXT:    ;;#ASMEND
-; GFX908-NEXT:    buffer_load_dword v31, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
-; GFX908-NEXT:    v_writelane_b32 v40, s21, 24
-; GFX908-NEXT:    v_writelane_b32 v40, s22, 25
-; GFX908-NEXT:    v_writelane_b32 v40, s23, 26
-; GFX908-NEXT:    v_writelane_b32 v40, s24, 27
-; GFX908-NEXT:    v_writelane_b32 v40, s25, 28
-; GFX908-NEXT:    v_writelane_b32 v40, s26, 29
-; GFX908-NEXT:    v_writelane_b32 v40, s27, 30
-; GFX908-NEXT:    v_writelane_b32 v40, s28, 31
-; GFX908-NEXT:    v_writelane_b32 v40, s29, 32
-; GFX908-NEXT:    v_readlane_b32 s4, v40, 10
-; GFX908-NEXT:    v_readlane_b32 s6, v40, 0
-; GFX908-NEXT:    v_readlane_b32 s8, v40, 8
-; GFX908-NEXT:    v_readlane_b32 s10, v40, 6
-; GFX908-NEXT:    v_readlane_b32 s16, v40, 22
-; GFX908-NEXT:    v_readlane_b32 s12, v40, 5
-; GFX908-NEXT:    v_readlane_b32 s13, v40, 4
-; GFX908-NEXT:    v_readlane_b32 s14, v40, 3
-; GFX908-NEXT:    v_readlane_b32 s15, v40, 2
-; GFX908-NEXT:    v_readlane_b32 s5, v40, 11
-; GFX908-NEXT:    v_readlane_b32 s7, v40, 1
-; GFX908-NEXT:    v_readlane_b32 s9, v40, 9
-; GFX908-NEXT:    v_readlane_b32 s11, v40, 7
-; GFX908-NEXT:    v_readlane_b32 s17, v40, 23
+; GFX908-NEXT:    v_writelane_b32 v39, s29, 32
 ; GFX908-NEXT:    s_or_saveexec_b64 s[34:35], -1
+; GFX908-NEXT:    buffer_store_dword v39, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
 ; GFX908-NEXT:    s_mov_b64 exec, s[34:35]
+; GFX908-NEXT:    v_readlane_b32 s4, v39, 10
+; GFX908-NEXT:    v_readlane_b32 s6, v39, 0
+; GFX908-NEXT:    v_readlane_b32 s8, v39, 8
+; GFX908-NEXT:    v_readlane_b32 s10, v39, 6
+; GFX908-NEXT:    v_readlane_b32 s16, v39, 22
+; GFX908-NEXT:    v_readlane_b32 s12, v39, 5
+; GFX908-NEXT:    v_mov_b32_e32 v31, v40
+; GFX908-NEXT:    v_readlane_b32 s13, v39, 4
+; GFX908-NEXT:    v_readlane_b32 s14, v39, 3
+; GFX908-NEXT:    v_readlane_b32 s15, v39, 2
+; GFX908-NEXT:    v_readlane_b32 s5, v39, 11
+; GFX908-NEXT:    v_readlane_b32 s7, v39, 1
+; GFX908-NEXT:    v_readlane_b32 s9, v39, 9
+; GFX908-NEXT:    v_readlane_b32 s11, v39, 7
+; GFX908-NEXT:    v_readlane_b32 s17, v39, 23
 ; GFX908-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX908-NEXT:    s_or_saveexec_b64 s[34:35], -1
+; GFX908-NEXT:    buffer_load_dword v39, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
 ; GFX908-NEXT:    s_mov_b64 exec, s[34:35]
-; GFX908-NEXT:    v_readlane_b32 s21, v40, 24
+; GFX908-NEXT:    s_waitcnt vmcnt(0)
+; GFX908-NEXT:    v_readlane_b32 s4, v39, 10
+; GFX908-NEXT:    v_readlane_b32 s6, v39, 0
+; GFX908-NEXT:    v_readlane_b32 s8, v39, 8
+; GFX908-NEXT:    v_readlane_b32 s10, v39, 6
+; GFX908-NEXT:    v_readlane_b32 s16, v39, 22
+; GFX908-NEXT:    v_readlane_b32 s5, v39, 11
+; GFX908-NEXT:    v_readlane_b32 s7, v39, 1
+; GFX908-NEXT:    v_readlane_b32 s9, v39, 9
+; GFX908-NEXT:    v_readlane_b32 s11, v39, 7
+; GFX908-NEXT:    v_readlane_b32 s12, v39, 5
+; GFX908-NEXT:    v_readlane_b32 s13, v39, 4
+; GFX908-NEXT:    v_readlane_b32 s14, v39, 3
+; GFX908-NEXT:    v_readlane_b32 s15, v39, 2
+; GFX908-NEXT:    v_mov_b32_e32 v31, v40
+; GFX908-NEXT:    v_readlane_b32 s17, v39, 23
+; GFX908-NEXT:    v_readlane_b32 s21, v39, 24
 ; GFX908-NEXT:    ;;#ASMSTART
 ; GFX908-NEXT:    ; use s21
 ; GFX908-NEXT:    ;;#ASMEND
-; GFX908-NEXT:    v_readlane_b32 s22, v40, 25
+; GFX908-NEXT:    v_readlane_b32 s22, v39, 25
 ; GFX908-NEXT:    ;;#ASMSTART
 ; GFX908-NEXT:    ; use s22
 ; GFX908-NEXT:    ;;#ASMEND
-; GFX908-NEXT:    v_readlane_b32 s23, v40, 26
+; GFX908-NEXT:    v_readlane_b32 s23, v39, 26
 ; GFX908-NEXT:    ;;#ASMSTART
 ; GFX908-NEXT:    ; use s23
 ; GFX908-NEXT:    ;;#ASMEND
-; GFX908-NEXT:    v_readlane_b32 s24, v40, 27
+; GFX908-NEXT:    v_readlane_b32 s24, v39, 27
 ; GFX908-NEXT:    ;;#ASMSTART
 ; GFX908-NEXT:    ; use s24
 ; GFX908-NEXT:    ;;#ASMEND
-; GFX908-NEXT:    v_readlane_b32 s25, v40, 28
+; GFX908-NEXT:    v_readlane_b32 s25, v39, 28
 ; GFX908-NEXT:    ;;#ASMSTART
 ; GFX908-NEXT:    ; use s25
 ; GFX908-NEXT:    ;;#ASMEND
-; GFX908-NEXT:    v_readlane_b32 s26, v40, 29
+; GFX908-NEXT:    v_readlane_b32 s26, v39, 29
 ; GFX908-NEXT:    ;;#ASMSTART
 ; GFX908-NEXT:    ; use s26
 ; GFX908-NEXT:    ;;#ASMEND
-; GFX908-NEXT:    v_readlane_b32 s27, v40, 30
+; GFX908-NEXT:    v_readlane_b32 s27, v39, 30
 ; GFX908-NEXT:    ;;#ASMSTART
 ; GFX908-NEXT:    ; use s27
 ; GFX908-NEXT:    ;;#ASMEND
-; GFX908-NEXT:    v_readlane_b32 s28, v40, 31
+; GFX908-NEXT:    v_readlane_b32 s28, v39, 31
 ; GFX908-NEXT:    ;;#ASMSTART
 ; GFX908-NEXT:    ; use s28
 ; GFX908-NEXT:    ;;#ASMEND
-; GFX908-NEXT:    v_readlane_b32 s29, v40, 32
+; GFX908-NEXT:    v_readlane_b32 s29, v39, 32
 ; GFX908-NEXT:    ;;#ASMSTART
 ; GFX908-NEXT:    ; use s29
 ; GFX908-NEXT:    ;;#ASMEND
-; GFX908-NEXT:    buffer_load_dword v31, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
-; GFX908-NEXT:    v_readlane_b32 s4, v40, 10
-; GFX908-NEXT:    v_readlane_b32 s6, v40, 0
-; GFX908-NEXT:    v_readlane_b32 s8, v40, 8
-; GFX908-NEXT:    v_readlane_b32 s10, v40, 6
-; GFX908-NEXT:    v_readlane_b32 s16, v40, 22
-; GFX908-NEXT:    v_readlane_b32 s5, v40, 11
-; GFX908-NEXT:    v_readlane_b32 s7, v40, 1
-; GFX908-NEXT:    v_readlane_b32 s9, v40, 9
-; GFX908-NEXT:    v_readlane_b32 s11, v40, 7
-; GFX908-NEXT:    v_readlane_b32 s12, v40, 5
-; GFX908-NEXT:    v_readlane_b32 s13, v40, 4
-; GFX908-NEXT:    v_readlane_b32 s14, v40, 3
-; GFX908-NEXT:    v_readlane_b32 s15, v40, 2
-; GFX908-NEXT:    v_readlane_b32 s17, v40, 23
-; GFX908-NEXT:    s_or_saveexec_b64 s[34:35], -1
-; GFX908-NEXT:    s_mov_b64 exec, s[34:35]
 ; GFX908-NEXT:    s_swappc_b64 s[30:31], s[16:17]
-; GFX908-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
-; GFX908-NEXT:    buffer_load_dword v1, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload
-; GFX908-NEXT:    buffer_load_dword v2, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload
-; GFX908-NEXT:    buffer_load_dword v3, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload
-; GFX908-NEXT:    buffer_load_dword v4, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload
-; GFX908-NEXT:    buffer_load_dword v5, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload
-; GFX908-NEXT:    buffer_load_dword v6, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload
-; GFX908-NEXT:    buffer_load_dword v7, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload
-; GFX908-NEXT:    buffer_load_dword v8, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload
-; GFX908-NEXT:    buffer_load_dword v9, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload
-; GFX908-NEXT:    buffer_load_dword v10, off, s[0:3], s33 offset:52 ; 4-byte Folded Reload
-; GFX908-NEXT:    buffer_load_dword v11, off, s[0:3], s33 offset:56 ; 4-byte Folded Reload
-; GFX908-NEXT:    buffer_load_dword v12, off, s[0:3], s33 offset:60 ; 4-byte Folded Reload
-; GFX908-NEXT:    buffer_load_dword v13, off, s[0:3], s33 offset:64 ; 4-byte Folded Reload
-; GFX908-NEXT:    buffer_load_dword v14, off, s[0:3], s33 offset:68 ; 4-byte Folded Reload
-; GFX908-NEXT:    buffer_load_dword v15, off, s[0:3], s33 offset:72 ; 4-byte Folded Reload
-; GFX908-NEXT:    buffer_load_dword v16, off, s[0:3], s33 offset:76 ; 4-byte Folded Reload
-; GFX908-NEXT:    buffer_load_dword v17, off, s[0:3], s33 offset:80 ; 4-byte Folded Reload
-; GFX908-NEXT:    buffer_load_dword v18, off, s[0:3], s33 offset:84 ; 4-byte Folded Reload
-; GFX908-NEXT:    buffer_load_dword v19, off, s[0:3], s33 offset:88 ; 4-byte Folded Reload
-; GFX908-NEXT:    buffer_load_dword v20, off, s[0:3], s33 offset:92 ; 4-byte Folded Reload
-; GFX908-NEXT:    buffer_load_dword v21, off, s[0:3], s33 offset:96 ; 4-byte Folded Reload
-; GFX908-NEXT:    buffer_load_dword v22, off, s[0:3], s33 offset:100 ; 4-byte Folded Reload
-; GFX908-NEXT:    buffer_load_dword v23, off, s[0:3], s33 offset:104 ; 4-byte Folded Reload
-; GFX908-NEXT:    buffer_load_dword v24, off, s[0:3], s33 offset:108 ; 4-byte Folded Reload
-; GFX908-NEXT:    buffer_load_dword v25, off, s[0:3], s33 offset:112 ; 4-byte Folded Reload
-; GFX908-NEXT:    buffer_load_dword v26, off, s[0:3], s33 offset:116 ; 4-byte Folded Reload
-; GFX908-NEXT:    buffer_load_dword v27, off, s[0:3], s33 offset:120 ; 4-byte Folded Reload
-; GFX908-NEXT:    buffer_load_dword v28, off, s[0:3], s33 offset:124 ; 4-byte Folded Reload
-; GFX908-NEXT:    buffer_load_dword v29, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload
-; GFX908-NEXT:    buffer_load_dword v30, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload
-; GFX908-NEXT:    buffer_load_dword v31, off, s[0:3], s33 offset:136 ; 4-byte Folded Reload
-; GFX908-NEXT:    buffer_load_dword v32, off, s[0:3], s33 offset:140 ; 4-byte Folded Reload
-; GFX908-NEXT:    buffer_load_dword v33, off, s[0:3], s33 offset:144 ; 4-byte Folded Reload
+; GFX908-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload
+; GFX908-NEXT:    buffer_load_dword v1, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload
+; GFX908-NEXT:    buffer_load_dword v2, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload
+; GFX908-NEXT:    buffer_load_dword v3, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload
+; GFX908-NEXT:    buffer_load_dword v4, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload
+; GFX908-NEXT:    buffer_load_dword v5, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload
+; GFX908-NEXT:    buffer_load_dword v6, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload
+; GFX908-NEXT:    buffer_load_dword v7, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload
+; GFX908-NEXT:    buffer_load_dword v8, off, s[0:3], s33 offset:52 ; 4-byte Folded Reload
+; GFX908-NEXT:    buffer_load_dword v9, off, s[0:3], s33 offset:56 ; 4-byte Folded Reload
+; GFX908-NEXT:    buffer_load_dword v10, off, s[0:3], s33 offset:60 ; 4-byte Folded Reload
+; GFX908-NEXT:    buffer_load_dword v11, off, s[0:3], s33 offset:64 ; 4-byte Folded Reload
+; GFX908-NEXT:    buffer_load_dword v12, off, s[0:3], s33 offset:68 ; 4-byte Folded Reload
+; GFX908-NEXT:    buffer_load_dword v13, off, s[0:3], s33 offset:72 ; 4-byte Folded Reload
+; GFX908-NEXT:    buffer_load_dword v14, off, s[0:3], s33 offset:76 ; 4-byte Folded Reload
+; GFX908-NEXT:    buffer_load_dword v15, off, s[0:3], s33 offset:80 ; 4-byte Folded Reload
+; GFX908-NEXT:    buffer_load_dword v16, off, s[0:3], s33 offset:84 ; 4-byte Folded Reload
+; GFX908-NEXT:    buffer_load_dword v17, off, s[0:3], s33 offset:88 ; 4-byte Folded Reload
+; GFX908-NEXT:    buffer_load_dword v18, off, s[0:3], s33 offset:92 ; 4-byte Folded Reload
+; GFX908-NEXT:    buffer_load_dword v19, off, s[0:3], s33 offset:96 ; 4-byte Folded Reload
+; GFX908-NEXT:    buffer_load_dword v20, off, s[0:3], s33 offset:100 ; 4-byte Folded Reload
+; GFX908-NEXT:    buffer_load_dword v21, off, s[0:3], s33 offset:104 ; 4-byte Folded Reload
+; GFX908-NEXT:    buffer_load_dword v22, off, s[0:3], s33 offset:108 ; 4-byte Folded Reload
+; GFX908-NEXT:    buffer_load_dword v23, off, s[0:3], s33 offset:112 ; 4-byte Folded Reload
+; GFX908-NEXT:    buffer_load_dword v24, off, s[0:3], s33 offset:116 ; 4-byte Folded Reload
+; GFX908-NEXT:    buffer_load_dword v25, off, s[0:3], s33 offset:120 ; 4-byte Folded Reload
+; GFX908-NEXT:    buffer_load_dword v26, off, s[0:3], s33 offset:124 ; 4-byte Folded Reload
+; GFX908-NEXT:    buffer_load_dword v27, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload
+; GFX908-NEXT:    buffer_load_dword v28, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload
+; GFX908-NEXT:    buffer_load_dword v29, off, s[0:3], s33 offset:136 ; 4-byte Folded Reload
+; GFX908-NEXT:    buffer_load_dword v30, off, s[0:3], s33 offset:140 ; 4-byte Folded Reload
+; GFX908-NEXT:    buffer_load_dword v31, off, s[0:3], s33 offset:144 ; 4-byte Folded Reload
+; GFX908-NEXT:    buffer_load_dword v32, off, s[0:3], s33 offset:148 ; 4-byte Folded Reload
+; GFX908-NEXT:    buffer_load_dword v33, off, s[0:3], s33 offset:152 ; 4-byte Folded Reload
 ; GFX908-NEXT:    s_mov_b64 s[4:5], exec
 ; GFX908-NEXT:    s_waitcnt vmcnt(0)
 ; GFX908-NEXT:    flat_store_dwordx4 v[0:1], v[30:33] offset:112
@@ -737,37 +736,34 @@ define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 {
 ; GFX908-NEXT:    flat_store_dwordx4 v[0:1], v[2:5]
 ; GFX908-NEXT:    s_waitcnt vmcnt(0)
 ; GFX908-NEXT:    s_mov_b64 exec, 1
-; GFX908-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:172
-; GFX908-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
+; GFX908-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:168
+; GFX908-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
 ; GFX908-NEXT:    s_waitcnt vmcnt(0)
 ; GFX908-NEXT:    v_readlane_b32 s31, v0, 0
-; GFX908-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:172
+; GFX908-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:168
 ; GFX908-NEXT:    s_waitcnt vmcnt(0)
 ; GFX908-NEXT:    s_mov_b64 exec, s[4:5]
 ; GFX908-NEXT:    s_mov_b64 s[4:5], exec
 ; GFX908-NEXT:    s_mov_b64 exec, 1
-; GFX908-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:172
-; GFX908-NEXT:    buffer_load_dword v0, off, s[0:3], s33 ; 4-byte Folded Reload
+; GFX908-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:168
+; GFX908-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
 ; GFX908-NEXT:    s_waitcnt vmcnt(0)
 ; GFX908-NEXT:    v_readlane_b32 s30, v0, 0
-; GFX908-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:172
+; GFX908-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:168
 ; GFX908-NEXT:    s_waitcnt vmcnt(0)
 ; GFX908-NEXT:    s_mov_b64 exec, s[4:5]
-; GFX908-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:168 ; 4-byte Folded Reload
-; GFX908-NEXT:    ; kill: killed $vgpr40
+; GFX908-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GFX908-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:164 ; 4-byte Folded Reload
 ; GFX908-NEXT:    s_waitcnt vmcnt(0)
 ; GFX908-NEXT:    v_readfirstlane_b32 s4, v0
-; GFX908-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:160 ; 4-byte Folded Reload
+; GFX908-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:156 ; 4-byte Folded Reload
 ; GFX908-NEXT:    s_waitcnt vmcnt(0)
 ; GFX908-NEXT:    v_readfirstlane_b32 s34, v0
-; GFX908-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:164 ; 4-byte Folded Reload
+; GFX908-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:160 ; 4-byte Folded Reload
 ; GFX908-NEXT:    s_waitcnt vmcnt(0)
 ; GFX908-NEXT:    v_readfirstlane_b32 s35, v0
 ; GFX908-NEXT:    s_xor_saveexec_b64 s[6:7], -1
-; GFX908-NEXT:    buffer_load_dword v33, off, s[0:3], s33 offset:148 ; 4-byte Folded Reload
-; GFX908-NEXT:    buffer_load_dword v2, off, s[0:3], s33 offset:156 ; 4-byte Folded Reload
-; GFX908-NEXT:    s_mov_b64 exec, -1
-; GFX908-NEXT:    buffer_load_dword v40, off, s[0:3], s33 offset:152 ; 4-byte Folded Reload
+; GFX908-NEXT:    buffer_load_dword v39, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload
 ; GFX908-NEXT:    s_mov_b64 exec, s[6:7]
 ; GFX908-NEXT:    s_addk_i32 s32, 0xd400
 ; GFX908-NEXT:    s_mov_b32 s33, s4
diff --git a/llvm/test/CodeGen/AMDGPU/regalloc-failure-overlapping-insert-assert.mir b/llvm/test/CodeGen/AMDGPU/regalloc-failure-overlapping-insert-assert.mir
index 447a8bf9956f3..fe01728c00563 100644
--- a/llvm/test/CodeGen/AMDGPU/regalloc-failure-overlapping-insert-assert.mir
+++ b/llvm/test/CodeGen/AMDGPU/regalloc-failure-overlapping-insert-assert.mir
@@ -1,5 +1,5 @@
-# RUN: not llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs=0 -start-before=greedy,1 -stop-after=virtregrewriter,1 %s -o /dev/null 2>&1 | FileCheck -check-prefix=ERR %s
-# RUN: not --crash llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs -start-before=greedy,1 -stop-after=virtregrewriter,1 %s -o /dev/null 2>&1 | FileCheck -check-prefixes=ERR,VERIFIER %s
+# RUN: not llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs=0 -start-before=greedy,1 -stop-after=virtregrewriter,2 %s -o /dev/null 2>&1 | FileCheck -check-prefix=ERR %s
+# RUN: not --crash llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs -start-before=greedy,1 -stop-after=virtregrewriter,2 %s -o /dev/null 2>&1 | FileCheck -check-prefixes=ERR,VERIFIER %s
 
 # FIXME: We should not produce a verifier error after erroring
 
diff --git a/llvm/test/CodeGen/AMDGPU/rem_i128.ll b/llvm/test/CodeGen/AMDGPU/rem_i128.ll
index b068d87c4d6f4..809e9f7f4d520 100644
--- a/llvm/test/CodeGen/AMDGPU/rem_i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/rem_i128.ll
@@ -242,348 +242,351 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0:       ; %bb.0: ; %_udiv-special-cases
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-O0-NEXT:    s_xor_saveexec_b64 s[4:5], -1
-; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:344 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:348 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:352 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:356 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v30, off, s[0:3], s32 offset:356 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[4:5]
-; GFX9-O0-NEXT:    ; implicit-def: $vgpr8 : SGPR spill to VGPR lane
-; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v7
-; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v2
-; GFX9-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v1
-; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v0
-; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v7
+; GFX9-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v5
+; GFX9-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:128 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    s_nop 0
+; GFX9-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v3
+; GFX9-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:124 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    s_nop 0
+; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v2
+; GFX9-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:120 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v1
+; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
-; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
-; GFX9-O0-NEXT:    s_waitcnt vmcnt(1)
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v2
+; GFX9-O0-NEXT:    ; kill: def $vgpr3 killed $vgpr3 def $vgpr3_vgpr4 killed $exec
+; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v1
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
-; GFX9-O0-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v7
+; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v6
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
-; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v9
+; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v8
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
-; GFX9-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v3
+; GFX9-O0-NEXT:    ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v2
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4_sgpr5
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4_sgpr5
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4_sgpr5
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4_sgpr5
 ; GFX9-O0-NEXT:    s_mov_b32 s4, 63
-; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v9
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v8
-; GFX9-O0-NEXT:    v_ashrrev_i64 v[11:12], s4, v[10:11]
-; GFX9-O0-NEXT:    buffer_store_dword v11, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v7
+; GFX9-O0-NEXT:    v_ashrrev_i64 v[10:11], s4, v[9:10]
+; GFX9-O0-NEXT:    buffer_store_dword v10, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    v_mov_b32_e32 v14, v12
+; GFX9-O0-NEXT:    buffer_store_dword v11, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v11
-; GFX9-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v10
+; GFX9-O0-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v14, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    v_mov_b32_e32 v14, v7
+; GFX9-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v6
-; GFX9-O0-NEXT:    v_ashrrev_i64 v[15:16], s4, v[13:14]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v9
-; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v12
-; GFX9-O0-NEXT:    v_xor_b32_e64 v3, v3, v10
-; GFX9-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 killed $vgpr8_vgpr9 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v11
-; GFX9-O0-NEXT:    v_xor_b32_e64 v13, v8, v12
-; GFX9-O0-NEXT:    ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v14, v3
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v2
-; GFX9-O0-NEXT:    v_xor_b32_e64 v3, v3, v10
-; GFX9-O0-NEXT:    ; kill: def $vgpr1 killed $vgpr1 killed $vgpr1_vgpr2 killed $exec
-; GFX9-O0-NEXT:    v_xor_b32_e64 v1, v1, v12
-; GFX9-O0-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v3
-; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v7
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v16
-; GFX9-O0-NEXT:    v_xor_b32_e64 v9, v8, v3
+; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v5
+; GFX9-O0-NEXT:    v_ashrrev_i64 v[14:15], s4, v[12:13]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v8
+; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v11
+; GFX9-O0-NEXT:    v_xor_b32_e64 v2, v2, v9
+; GFX9-O0-NEXT:    ; kill: def $vgpr7 killed $vgpr7 killed $vgpr7_vgpr8 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v10
+; GFX9-O0-NEXT:    v_xor_b32_e64 v12, v7, v11
+; GFX9-O0-NEXT:    ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v2
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v1
+; GFX9-O0-NEXT:    v_xor_b32_e64 v2, v2, v9
+; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
+; GFX9-O0-NEXT:    v_xor_b32_e64 v0, v0, v11
+; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v2
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v15
-; GFX9-O0-NEXT:    v_xor_b32_e64 v7, v7, v6
-; GFX9-O0-NEXT:    ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v9
-; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v5
-; GFX9-O0-NEXT:    v_xor_b32_e64 v9, v9, v3
-; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
-; GFX9-O0-NEXT:    v_xor_b32_e64 v4, v4, v6
-; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v9
-; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v1
-; GFX9-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr2 killed $vgpr1_vgpr2 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v13
-; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v14
-; GFX9-O0-NEXT:    v_sub_co_u32_e32 v9, vcc, v9, v12
-; GFX9-O0-NEXT:    v_subb_co_u32_e32 v2, vcc, v2, v10, vcc
-; GFX9-O0-NEXT:    v_subb_co_u32_e32 v13, vcc, v11, v12, vcc
-; GFX9-O0-NEXT:    v_subb_co_u32_e32 v1, vcc, v1, v10, vcc
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v15
+; GFX9-O0-NEXT:    v_xor_b32_e64 v8, v7, v2
+; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v5
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v14
+; GFX9-O0-NEXT:    v_xor_b32_e64 v6, v6, v5
+; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v8
+; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v4
+; GFX9-O0-NEXT:    v_xor_b32_e64 v8, v8, v2
+; GFX9-O0-NEXT:    ; kill: def $vgpr3 killed $vgpr3 killed $vgpr3_vgpr4 killed $exec
+; GFX9-O0-NEXT:    v_xor_b32_e64 v3, v3, v5
+; GFX9-O0-NEXT:    ; kill: def $vgpr3 killed $vgpr3 def $vgpr3_vgpr4 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v8
+; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v0
+; GFX9-O0-NEXT:    ; kill: def $vgpr1 killed $vgpr1 killed $vgpr0_vgpr1 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v12
+; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v13
+; GFX9-O0-NEXT:    v_sub_co_u32_e32 v8, vcc, v8, v11
+; GFX9-O0-NEXT:    v_subb_co_u32_e32 v1, vcc, v1, v9, vcc
+; GFX9-O0-NEXT:    v_subb_co_u32_e32 v12, vcc, v10, v11, vcc
+; GFX9-O0-NEXT:    v_subb_co_u32_e32 v0, vcc, v0, v9, vcc
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
-; GFX9-O0-NEXT:    ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v2
+; GFX9-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v1
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
-; GFX9-O0-NEXT:    ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v14, v1
-; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v4
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v5
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v7
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v8
-; GFX9-O0-NEXT:    v_sub_co_u32_e32 v1, vcc, v1, v6
-; GFX9-O0-NEXT:    v_subb_co_u32_e32 v4, vcc, v4, v3, vcc
-; GFX9-O0-NEXT:    v_subb_co_u32_e32 v11, vcc, v5, v6, vcc
-; GFX9-O0-NEXT:    v_subb_co_u32_e32 v3, vcc, v2, v3, vcc
+; GFX9-O0-NEXT:    ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v0
+; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v3
+; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v4
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v6
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v7
+; GFX9-O0-NEXT:    v_sub_co_u32_e32 v0, vcc, v0, v5
+; GFX9-O0-NEXT:    v_subb_co_u32_e32 v3, vcc, v3, v2, vcc
+; GFX9-O0-NEXT:    v_subb_co_u32_e32 v10, vcc, v4, v5, vcc
+; GFX9-O0-NEXT:    v_subb_co_u32_e32 v2, vcc, v1, v2, vcc
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
-; GFX9-O0-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v4
+; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v3
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
-; GFX9-O0-NEXT:    ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v3
+; GFX9-O0-NEXT:    ; kill: def $vgpr10 killed $vgpr10 def $vgpr10_vgpr11 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v2
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v12
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v13
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v14
-; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v8
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v9
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v10
-; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v10
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v11
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v12
-; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v2
+; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v1
-; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v0
+; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v10
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v11
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v12
-; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v2
+; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v1
-; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v0
+; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v12
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v13
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v14
-; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v8
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v9
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v10
-; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v12
-; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v2
-; GFX9-O0-NEXT:    v_or_b32_e64 v3, v8, v7
+; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v11
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v1
-; GFX9-O0-NEXT:    v_or_b32_e64 v1, v5, v6
-; GFX9-O0-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v3
+; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v1
+; GFX9-O0-NEXT:    v_or_b32_e64 v2, v7, v6
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v10
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v0
+; GFX9-O0-NEXT:    v_or_b32_e64 v0, v4, v5
+; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v2
 ; GFX9-O0-NEXT:    s_mov_b64 s[6:7], 0
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s6, 0
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s7, 1
-; GFX9-O0-NEXT:    v_cmp_eq_u64_e64 s[4:5], v[1:2], s[6:7]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v14
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v10
-; GFX9-O0-NEXT:    v_or_b32_e64 v15, v4, v2
+; GFX9-O0-NEXT:    ; implicit-def: $vgpr30 : SGPR spill to VGPR lane
+; GFX9-O0-NEXT:    v_writelane_b32 v30, s6, 0
+; GFX9-O0-NEXT:    v_writelane_b32 v30, s7, 1
+; GFX9-O0-NEXT:    v_cmp_eq_u64_e64 s[4:5], v[0:1], s[6:7]
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v13
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v9
-; GFX9-O0-NEXT:    v_or_b32_e64 v9, v3, v1
-; GFX9-O0-NEXT:    ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v15
-; GFX9-O0-NEXT:    v_cmp_eq_u64_e64 s[8:9], v[9:10], s[6:7]
+; GFX9-O0-NEXT:    v_or_b32_e64 v14, v3, v1
+; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v12
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v8
+; GFX9-O0-NEXT:    v_or_b32_e64 v8, v2, v0
+; GFX9-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v14
+; GFX9-O0-NEXT:    v_cmp_eq_u64_e64 s[8:9], v[8:9], s[6:7]
 ; GFX9-O0-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
-; GFX9-O0-NEXT:    v_ffbh_u32_e64 v6, v6
+; GFX9-O0-NEXT:    v_ffbh_u32_e64 v5, v5
 ; GFX9-O0-NEXT:    s_mov_b32 s9, 32
-; GFX9-O0-NEXT:    v_add_u32_e64 v6, v6, s9
-; GFX9-O0-NEXT:    v_ffbh_u32_e64 v7, v7
-; GFX9-O0-NEXT:    v_min_u32_e64 v6, v6, v7
+; GFX9-O0-NEXT:    v_add_u32_e64 v5, v5, s9
+; GFX9-O0-NEXT:    v_ffbh_u32_e64 v6, v6
+; GFX9-O0-NEXT:    v_min_u32_e64 v5, v5, v6
 ; GFX9-O0-NEXT:    s_mov_b32 s8, 0
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr10
-; GFX9-O0-NEXT:    v_mov_b32_e32 v9, s8
-; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v9
-; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v7
-; GFX9-O0-NEXT:    v_ffbh_u32_e64 v5, v5
-; GFX9-O0-NEXT:    v_add_u32_e64 v5, v5, s9
-; GFX9-O0-NEXT:    v_ffbh_u32_e64 v8, v8
-; GFX9-O0-NEXT:    v_min_u32_e64 v15, v5, v8
+; GFX9-O0-NEXT:    v_mov_b32_e32 v8, s8
+; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v8
+; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v6
+; GFX9-O0-NEXT:    v_ffbh_u32_e64 v4, v4
+; GFX9-O0-NEXT:    v_add_u32_e64 v4, v4, s9
+; GFX9-O0-NEXT:    v_ffbh_u32_e64 v7, v7
+; GFX9-O0-NEXT:    v_min_u32_e64 v14, v4, v7
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr10
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, s8
-; GFX9-O0-NEXT:    ; kill: def $vgpr15 killed $vgpr15 def $vgpr15_vgpr16 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v5
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s8
+; GFX9-O0-NEXT:    ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v4
 ; GFX9-O0-NEXT:    s_mov_b64 s[10:11], 64
-; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v15
+; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v14
 ; GFX9-O0-NEXT:    s_mov_b32 s12, s10
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v16
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v15
 ; GFX9-O0-NEXT:    s_mov_b32 s14, s11
-; GFX9-O0-NEXT:    v_add_co_u32_e64 v8, s[12:13], v8, s12
-; GFX9-O0-NEXT:    v_mov_b32_e32 v9, s14
-; GFX9-O0-NEXT:    v_addc_co_u32_e64 v5, s[12:13], v5, v9, s[12:13]
-; GFX9-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v5
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v9
+; GFX9-O0-NEXT:    v_add_co_u32_e64 v7, s[12:13], v7, s12
+; GFX9-O0-NEXT:    v_mov_b32_e32 v8, s14
+; GFX9-O0-NEXT:    v_addc_co_u32_e64 v4, s[12:13], v4, v8, s[12:13]
+; GFX9-O0-NEXT:    ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v4
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v8
 ; GFX9-O0-NEXT:    s_mov_b64 s[12:13], s[6:7]
-; GFX9-O0-NEXT:    v_cmp_ne_u64_e64 s[12:13], v[11:12], s[12:13]
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v5, v5, v10, s[12:13]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v8
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v9, v6, v7, s[12:13]
+; GFX9-O0-NEXT:    v_cmp_ne_u64_e64 s[12:13], v[10:11], s[12:13]
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v9, s[12:13]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v5
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v7
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v8, v5, v6, s[12:13]
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr12
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr12
-; GFX9-O0-NEXT:    ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v5
+; GFX9-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v4
+; GFX9-O0-NEXT:    v_ffbh_u32_e64 v4, v0
+; GFX9-O0-NEXT:    v_add_u32_e64 v4, v4, s9
 ; GFX9-O0-NEXT:    v_ffbh_u32_e64 v5, v1
-; GFX9-O0-NEXT:    v_add_u32_e64 v5, v5, s9
-; GFX9-O0-NEXT:    v_ffbh_u32_e64 v6, v2
-; GFX9-O0-NEXT:    v_min_u32_e64 v6, v5, v6
+; GFX9-O0-NEXT:    v_min_u32_e64 v5, v4, v5
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr12
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, s8
-; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v5
-; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v7
-; GFX9-O0-NEXT:    v_ffbh_u32_e64 v5, v3
-; GFX9-O0-NEXT:    v_add_u32_e64 v5, v5, s9
-; GFX9-O0-NEXT:    v_ffbh_u32_e64 v11, v4
-; GFX9-O0-NEXT:    v_min_u32_e64 v15, v5, v11
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s8
+; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v4
+; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v6
+; GFX9-O0-NEXT:    v_ffbh_u32_e64 v4, v2
+; GFX9-O0-NEXT:    v_add_u32_e64 v4, v4, s9
+; GFX9-O0-NEXT:    v_ffbh_u32_e64 v10, v3
+; GFX9-O0-NEXT:    v_min_u32_e64 v14, v4, v10
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr9
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, s8
-; GFX9-O0-NEXT:    ; kill: def $vgpr15 killed $vgpr15 def $vgpr15_vgpr16 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v5
-; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v15
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s8
+; GFX9-O0-NEXT:    ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v4
+; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v14
 ; GFX9-O0-NEXT:    s_mov_b32 s8, s10
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v16
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v15
 ; GFX9-O0-NEXT:    s_mov_b32 s10, s11
-; GFX9-O0-NEXT:    v_add_co_u32_e64 v11, s[8:9], v11, s8
-; GFX9-O0-NEXT:    v_mov_b32_e32 v12, s10
-; GFX9-O0-NEXT:    v_addc_co_u32_e64 v5, s[8:9], v5, v12, s[8:9]
-; GFX9-O0-NEXT:    ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v5
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v12
+; GFX9-O0-NEXT:    v_add_co_u32_e64 v10, s[8:9], v10, s8
+; GFX9-O0-NEXT:    v_mov_b32_e32 v11, s10
+; GFX9-O0-NEXT:    v_addc_co_u32_e64 v4, s[8:9], v4, v11, s[8:9]
+; GFX9-O0-NEXT:    ; kill: def $vgpr10 killed $vgpr10 def $vgpr10_vgpr11 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v4
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v11
 ; GFX9-O0-NEXT:    s_mov_b64 s[8:9], s[6:7]
-; GFX9-O0-NEXT:    v_cmp_ne_u64_e64 s[8:9], v[13:14], s[8:9]
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v5, v5, v8, s[8:9]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v11
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v6, v6, v7, s[8:9]
+; GFX9-O0-NEXT:    v_cmp_ne_u64_e64 s[8:9], v[12:13], s[8:9]
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v7, s[8:9]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v5
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v10
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v5, v5, v6, s[8:9]
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
-; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
+; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v4
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v5
-; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v6
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v8
+; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 killed $vgpr5_vgpr6 killed $exec
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v9
-; GFX9-O0-NEXT:    ; kill: def $vgpr7 killed $vgpr7 killed $vgpr6_vgpr7 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v10
 ; GFX9-O0-NEXT:    s_mov_b32 s10, s6
 ; GFX9-O0-NEXT:    s_mov_b32 s11, s7
-; GFX9-O0-NEXT:    v_sub_co_u32_e32 v5, vcc, v5, v8
-; GFX9-O0-NEXT:    v_subb_co_u32_e32 v9, vcc, v6, v7, vcc
-; GFX9-O0-NEXT:    v_mov_b32_e32 v7, s10
+; GFX9-O0-NEXT:    v_sub_co_u32_e32 v4, vcc, v4, v7
+; GFX9-O0-NEXT:    v_subb_co_u32_e32 v8, vcc, v5, v6, vcc
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v6, s10
-; GFX9-O0-NEXT:    v_subb_co_u32_e32 v8, vcc, v6, v7, vcc
-; GFX9-O0-NEXT:    v_mov_b32_e32 v7, s11
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, s10
+; GFX9-O0-NEXT:    v_subb_co_u32_e32 v7, vcc, v5, v6, vcc
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v6, s11
-; GFX9-O0-NEXT:    v_subb_co_u32_e32 v7, vcc, v6, v7, vcc
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, s11
+; GFX9-O0-NEXT:    v_subb_co_u32_e32 v6, vcc, v5, v6, vcc
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
-; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v9
+; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v8
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
-; GFX9-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v7
-; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v9
+; GFX9-O0-NEXT:    ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v6
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v8
-; GFX9-O0-NEXT:    buffer_store_dword v10, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v7
+; GFX9-O0-NEXT:    buffer_store_dword v9, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v11, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v6
+; GFX9-O0-NEXT:    buffer_store_dword v10, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v5
-; GFX9-O0-NEXT:    buffer_store_dword v10, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v4
+; GFX9-O0-NEXT:    buffer_store_dword v9, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v11, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v10, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_mov_b64 s[8:9], s[6:7]
-; GFX9-O0-NEXT:    v_cmp_eq_u64_e64 s[8:9], v[8:9], s[8:9]
+; GFX9-O0-NEXT:    v_cmp_eq_u64_e64 s[8:9], v[7:8], s[8:9]
 ; GFX9-O0-NEXT:    s_mov_b64 s[12:13], 0x7f
 ; GFX9-O0-NEXT:    s_mov_b64 s[14:15], s[12:13]
-; GFX9-O0-NEXT:    v_cmp_gt_u64_e64 s[14:15], v[5:6], s[14:15]
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[14:15]
+; GFX9-O0-NEXT:    v_cmp_gt_u64_e64 s[14:15], v[4:5], s[14:15]
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[14:15]
 ; GFX9-O0-NEXT:    s_mov_b64 s[14:15], s[6:7]
-; GFX9-O0-NEXT:    v_cmp_ne_u64_e64 s[14:15], v[8:9], s[14:15]
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s[14:15]
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v7, v7, v10, s[8:9]
-; GFX9-O0-NEXT:    v_and_b32_e64 v7, 1, v7
-; GFX9-O0-NEXT:    v_cmp_eq_u32_e64 s[8:9], v7, 1
+; GFX9-O0-NEXT:    v_cmp_ne_u64_e64 s[14:15], v[7:8], s[14:15]
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s[14:15]
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v6, v6, v9, s[8:9]
+; GFX9-O0-NEXT:    v_and_b32_e64 v6, 1, v6
+; GFX9-O0-NEXT:    v_cmp_eq_u32_e64 s[8:9], v6, 1
 ; GFX9-O0-NEXT:    s_or_b64 s[8:9], s[4:5], s[8:9]
 ; GFX9-O0-NEXT:    s_mov_b64 s[4:5], -1
 ; GFX9-O0-NEXT:    s_xor_b64 s[4:5], s[8:9], s[4:5]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v6
+; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v5
 ; GFX9-O0-NEXT:    s_mov_b32 s14, s13
-; GFX9-O0-NEXT:    v_xor_b32_e64 v7, v7, s14
-; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 killed $vgpr5_vgpr6 killed $exec
+; GFX9-O0-NEXT:    v_xor_b32_e64 v6, v6, s14
+; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
 ; GFX9-O0-NEXT:    ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13
-; GFX9-O0-NEXT:    v_xor_b32_e64 v5, v5, s12
-; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v7
-; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v9
-; GFX9-O0-NEXT:    v_or_b32_e64 v7, v7, v10
-; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 killed $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v8
-; GFX9-O0-NEXT:    v_or_b32_e64 v5, v5, v6
-; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v7
-; GFX9-O0-NEXT:    v_cmp_ne_u64_e64 s[6:7], v[5:6], s[6:7]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, s11
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v5, v2, v5, s[8:9]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, s10
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v1, v1, v2, s[8:9]
+; GFX9-O0-NEXT:    v_xor_b32_e64 v4, v4, s12
+; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v6
+; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v5
+; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v8
+; GFX9-O0-NEXT:    v_or_b32_e64 v6, v6, v9
+; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v7
+; GFX9-O0-NEXT:    v_or_b32_e64 v4, v4, v5
+; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v6
+; GFX9-O0-NEXT:    v_cmp_ne_u64_e64 s[6:7], v[4:5], s[6:7]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s11
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v4, v1, v4, s[8:9]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, s10
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v0, v0, v1, s[8:9]
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr12
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr12
-; GFX9-O0-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v5
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, s11
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v5, v4, v5, s[8:9]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s10
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v3, v3, v4, s[8:9]
+; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v4
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s11
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v4, v3, v4, s[8:9]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v3, s10
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v2, v2, v3, s[8:9]
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
-; GFX9-O0-NEXT:    ; kill: def $vgpr3 killed $vgpr3 def $vgpr3_vgpr4 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v5
+; GFX9-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v4
 ; GFX9-O0-NEXT:    s_and_b64 s[6:7], s[4:5], s[6:7]
-; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_mov_b64 s[4:5], exec
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s4, 2
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s5, 3
+; GFX9-O0-NEXT:    v_writelane_b32 v30, s4, 2
+; GFX9-O0-NEXT:    v_writelane_b32 v30, s5, 3
 ; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
 ; GFX9-O0-NEXT:    s_and_b64 s[4:5], s[4:5], s[6:7]
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[4:5]
@@ -591,64 +594,64 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    s_branch .LBB0_8
 ; GFX9-O0-NEXT:  .LBB0_1: ; %Flow
 ; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    v_readlane_b32 s4, v0, 4
-; GFX9-O0-NEXT:    v_readlane_b32 s5, v0, 5
+; GFX9-O0-NEXT:    v_readlane_b32 s4, v30, 4
+; GFX9-O0-NEXT:    v_readlane_b32 s5, v30, 5
 ; GFX9-O0-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX9-O0-NEXT:  ; %bb.2: ; %Flow
-; GFX9-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:152 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:156 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:160 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:164 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:168 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:172 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:176 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:180 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:164 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:168 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:172 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:176 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:180 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:184 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:188 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:192 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(6)
-; GFX9-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_branch .LBB0_5
 ; GFX9-O0-NEXT:  .LBB0_3: ; %Flow2
 ; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    v_readlane_b32 s4, v4, 2
-; GFX9-O0-NEXT:    v_readlane_b32 s5, v4, 3
+; GFX9-O0-NEXT:    v_readlane_b32 s4, v30, 2
+; GFX9-O0-NEXT:    v_readlane_b32 s5, v30, 3
 ; GFX9-O0-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_branch .LBB0_9
 ; GFX9-O0-NEXT:  .LBB0_4: ; %udiv-loop-exit
-; GFX9-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:200 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:204 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:208 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:212 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v9, off, s[0:3], s32 offset:224 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v10, off, s[0:3], s32 offset:228 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:212 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:224 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:228 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v9, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v10, off, s[0:3], s32 offset:240 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_mov_b32 s4, 1
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(2)
 ; GFX9-O0-NEXT:    v_lshlrev_b64 v[2:3], s4, v[0:1]
@@ -682,116 +685,116 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    s_branch .LBB0_3
 ; GFX9-O0-NEXT:  .LBB0_5: ; %Flow1
 ; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    v_readlane_b32 s4, v8, 6
-; GFX9-O0-NEXT:    v_readlane_b32 s5, v8, 7
+; GFX9-O0-NEXT:    v_readlane_b32 s4, v30, 6
+; GFX9-O0-NEXT:    v_readlane_b32 s5, v30, 7
 ; GFX9-O0-NEXT:    s_or_b64 exec, exec, s[4:5]
-; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:144 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:148 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:136 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:140 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:128 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:120 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:124 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:156 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:160 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:148 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:152 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:140 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:144 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:136 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_branch .LBB0_4
 ; GFX9-O0-NEXT:  .LBB0_6: ; %udiv-do-while
 ; GFX9-O0-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT:    buffer_load_dword v16, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    v_readlane_b32 s6, v16, 8
-; GFX9-O0-NEXT:    v_readlane_b32 s7, v16, 9
-; GFX9-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:240 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v23, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v24, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v17, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v18, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v9, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v27, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v28, off, s[0:3], s32 offset:284 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v25, off, s[0:3], s32 offset:288 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    v_readlane_b32 s6, v30, 8
+; GFX9-O0-NEXT:    v_readlane_b32 s7, v30, 9
+; GFX9-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v22, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v23, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v16, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v17, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:284 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v9, off, s[0:3], s32 offset:288 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v26, off, s[0:3], s32 offset:292 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v19, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v20, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v21, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v22, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v14, off, s[0:3], s32 offset:296 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v15, off, s[0:3], s32 offset:300 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:304 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v12, off, s[0:3], s32 offset:308 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v27, off, s[0:3], s32 offset:296 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v24, off, s[0:3], s32 offset:300 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v25, off, s[0:3], s32 offset:304 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v18, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v19, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v20, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v21, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v14, off, s[0:3], s32 offset:308 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v15, off, s[0:3], s32 offset:312 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:316 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v12, off, s[0:3], s32 offset:320 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_mov_b32 s4, 63
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(16)
-; GFX9-O0-NEXT:    v_lshrrev_b64 v[29:30], s4, v[2:3]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v30
+; GFX9-O0-NEXT:    v_lshrrev_b64 v[28:29], s4, v[2:3]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v29
 ; GFX9-O0-NEXT:    s_mov_b32 s5, 1
-; GFX9-O0-NEXT:    v_lshlrev_b64 v[23:24], s5, v[23:24]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v24
+; GFX9-O0-NEXT:    v_lshlrev_b64 v[22:23], s5, v[22:23]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v23
 ; GFX9-O0-NEXT:    v_or_b32_e64 v4, v4, v5
-; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v29
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v23
-; GFX9-O0-NEXT:    v_or_b32_e64 v23, v5, v10
-; GFX9-O0-NEXT:    ; kill: def $vgpr23 killed $vgpr23 def $vgpr23_vgpr24 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v24, v4
-; GFX9-O0-NEXT:    v_lshlrev_b64 v[29:30], s5, v[2:3]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v28
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v22
+; GFX9-O0-NEXT:    v_or_b32_e64 v22, v5, v10
+; GFX9-O0-NEXT:    ; kill: def $vgpr22 killed $vgpr22 def $vgpr22_vgpr23 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v23, v4
+; GFX9-O0-NEXT:    v_lshlrev_b64 v[28:29], s5, v[2:3]
 ; GFX9-O0-NEXT:    v_lshrrev_b64 v[4:5], s4, v[6:7]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v30
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v29
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v5
 ; GFX9-O0-NEXT:    v_or_b32_e64 v2, v2, v3
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v29
+; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v28
 ; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
 ; GFX9-O0-NEXT:    v_or_b32_e64 v4, v3, v4
 ; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v2
 ; GFX9-O0-NEXT:    v_lshlrev_b64 v[2:3], s5, v[0:1]
-; GFX9-O0-NEXT:    v_lshlrev_b64 v[29:30], s5, v[6:7]
+; GFX9-O0-NEXT:    v_lshlrev_b64 v[28:29], s5, v[6:7]
 ; GFX9-O0-NEXT:    v_lshrrev_b64 v[0:1], s4, v[0:1]
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v1
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v30
+; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v29
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(10)
-; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v28
+; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v27
 ; GFX9-O0-NEXT:    v_or3_b32 v6, v6, v7, v10
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v0
-; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v29
-; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v27
+; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v28
+; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v26
 ; GFX9-O0-NEXT:    v_or3_b32 v0, v0, v1, v7
 ; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v6
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v3
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(8)
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v26
+; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v25
 ; GFX9-O0-NEXT:    v_or_b32_e64 v6, v6, v7
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v2
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v25
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v24
 ; GFX9-O0-NEXT:    v_or_b32_e64 v2, v2, v3
 ; GFX9-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v6
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v4
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v5
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v23
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v24
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v22
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v23
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v11
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v14
@@ -816,22 +819,22 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v12
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v12, 0
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v13, 0
-; GFX9-O0-NEXT:    v_mov_b32_e32 v23, v22
-; GFX9-O0-NEXT:    v_and_b32_e64 v23, v7, v23
-; GFX9-O0-NEXT:    v_and_b32_e64 v21, v11, v21
-; GFX9-O0-NEXT:    ; kill: def $vgpr21 killed $vgpr21 def $vgpr21_vgpr22 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v22, v23
-; GFX9-O0-NEXT:    v_mov_b32_e32 v23, v20
-; GFX9-O0-NEXT:    v_and_b32_e64 v7, v7, v23
-; GFX9-O0-NEXT:    v_and_b32_e64 v23, v11, v19
-; GFX9-O0-NEXT:    ; kill: def $vgpr23 killed $vgpr23 def $vgpr23_vgpr24 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v24, v7
-; GFX9-O0-NEXT:    v_mov_b32_e32 v20, v23
-; GFX9-O0-NEXT:    v_mov_b32_e32 v19, v24
-; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v21
-; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v22
-; GFX9-O0-NEXT:    v_sub_co_u32_e32 v6, vcc, v6, v20
-; GFX9-O0-NEXT:    v_subb_co_u32_e32 v10, vcc, v10, v19, vcc
+; GFX9-O0-NEXT:    v_mov_b32_e32 v22, v21
+; GFX9-O0-NEXT:    v_and_b32_e64 v22, v7, v22
+; GFX9-O0-NEXT:    v_and_b32_e64 v20, v11, v20
+; GFX9-O0-NEXT:    ; kill: def $vgpr20 killed $vgpr20 def $vgpr20_vgpr21 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v21, v22
+; GFX9-O0-NEXT:    v_mov_b32_e32 v22, v19
+; GFX9-O0-NEXT:    v_and_b32_e64 v7, v7, v22
+; GFX9-O0-NEXT:    v_and_b32_e64 v22, v11, v18
+; GFX9-O0-NEXT:    ; kill: def $vgpr22 killed $vgpr22 def $vgpr22_vgpr23 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v23, v7
+; GFX9-O0-NEXT:    v_mov_b32_e32 v19, v22
+; GFX9-O0-NEXT:    v_mov_b32_e32 v18, v23
+; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v20
+; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v21
+; GFX9-O0-NEXT:    v_sub_co_u32_e32 v6, vcc, v6, v19
+; GFX9-O0-NEXT:    v_subb_co_u32_e32 v10, vcc, v10, v18, vcc
 ; GFX9-O0-NEXT:    v_subb_co_u32_e32 v4, vcc, v4, v11, vcc
 ; GFX9-O0-NEXT:    v_subb_co_u32_e32 v7, vcc, v5, v7, vcc
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
@@ -847,149 +850,149 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    s_mov_b64 s[8:9], -1
 ; GFX9-O0-NEXT:    s_mov_b32 s5, s8
 ; GFX9-O0-NEXT:    s_mov_b32 s4, s9
-; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v17
-; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v18
-; GFX9-O0-NEXT:    v_mov_b32_e32 v17, s5
-; GFX9-O0-NEXT:    v_add_co_u32_e32 v20, vcc, v11, v17
+; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v16
+; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v17
+; GFX9-O0-NEXT:    v_mov_b32_e32 v16, s5
+; GFX9-O0-NEXT:    v_add_co_u32_e32 v19, vcc, v11, v16
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v11, s4
 ; GFX9-O0-NEXT:    v_addc_co_u32_e32 v9, vcc, v9, v11, vcc
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v11, s5
-; GFX9-O0-NEXT:    v_addc_co_u32_e32 v17, vcc, v10, v11, vcc
+; GFX9-O0-NEXT:    v_addc_co_u32_e32 v16, vcc, v10, v11, vcc
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v10, s4
 ; GFX9-O0-NEXT:    v_addc_co_u32_e32 v8, vcc, v8, v10, vcc
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
-; GFX9-O0-NEXT:    ; kill: def $vgpr20 killed $vgpr20 def $vgpr20_vgpr21 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v21, v9
+; GFX9-O0-NEXT:    ; kill: def $vgpr19 killed $vgpr19 def $vgpr19_vgpr20 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v20, v9
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
-; GFX9-O0-NEXT:    ; kill: def $vgpr17 killed $vgpr17 def $vgpr17_vgpr18 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v18, v8
-; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v17
-; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v18
-; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v20
-; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v21
-; GFX9-O0-NEXT:    v_mov_b32_e32 v22, v18
-; GFX9-O0-NEXT:    v_mov_b32_e32 v19, v21
-; GFX9-O0-NEXT:    v_or_b32_e64 v19, v19, v22
-; GFX9-O0-NEXT:    v_mov_b32_e32 v18, v17
-; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v20
-; GFX9-O0-NEXT:    v_or_b32_e64 v17, v17, v18
-; GFX9-O0-NEXT:    ; kill: def $vgpr17 killed $vgpr17 def $vgpr17_vgpr18 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v18, v19
-; GFX9-O0-NEXT:    v_cmp_eq_u64_e64 s[4:5], v[17:18], v[12:13]
+; GFX9-O0-NEXT:    ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v8
+; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v16
+; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v17
+; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v19
+; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v20
+; GFX9-O0-NEXT:    v_mov_b32_e32 v21, v17
+; GFX9-O0-NEXT:    v_mov_b32_e32 v18, v20
+; GFX9-O0-NEXT:    v_or_b32_e64 v18, v18, v21
+; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v16
+; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v19
+; GFX9-O0-NEXT:    v_or_b32_e64 v16, v16, v17
+; GFX9-O0-NEXT:    ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v18
+; GFX9-O0-NEXT:    v_cmp_eq_u64_e64 s[4:5], v[16:17], v[12:13]
 ; GFX9-O0-NEXT:    s_or_b64 s[4:5], s[4:5], s[6:7]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v18, v3
-; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v2
-; GFX9-O0-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v18, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    v_mov_b32_e32 v18, v1
-; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v0
-; GFX9-O0-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v3
+; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v2
+; GFX9-O0-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v18, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    v_mov_b32_e32 v18, v15
-; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v14
 ; GFX9-O0-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v1
+; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v0
+; GFX9-O0-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v18, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    v_mov_b32_e32 v18, v13
-; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v12
 ; GFX9-O0-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v15
+; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v14
+; GFX9-O0-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-O0-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v13
+; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v12
+; GFX9-O0-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v18, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_mov_b64 s[6:7], s[4:5]
-; GFX9-O0-NEXT:    v_writelane_b32 v16, s6, 4
-; GFX9-O0-NEXT:    v_writelane_b32 v16, s7, 5
+; GFX9-O0-NEXT:    v_writelane_b32 v30, s6, 4
+; GFX9-O0-NEXT:    v_writelane_b32 v30, s7, 5
 ; GFX9-O0-NEXT:    s_mov_b64 s[6:7], s[4:5]
-; GFX9-O0-NEXT:    v_writelane_b32 v16, s6, 8
-; GFX9-O0-NEXT:    v_writelane_b32 v16, s7, 9
+; GFX9-O0-NEXT:    v_writelane_b32 v30, s6, 8
+; GFX9-O0-NEXT:    v_writelane_b32 v30, s7, 9
 ; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT:    buffer_store_dword v16, off, s[0:3], s32 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
-; GFX9-O0-NEXT:    buffer_store_dword v14, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v14, off, s[0:3], s32 offset:300 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v15, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v15, off, s[0:3], s32 offset:304 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v10, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v10, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v11, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v11, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v9, off, s[0:3], s32 offset:268 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v9, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:268 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:248 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:252 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:252 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:248 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_andn2_b64 exec, exec, s[4:5]
 ; GFX9-O0-NEXT:    s_cbranch_execnz .LBB0_6
 ; GFX9-O0-NEXT:    s_branch .LBB0_1
 ; GFX9-O0-NEXT:  .LBB0_7: ; %udiv-preheader
-; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:312 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:316 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:320 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:324 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:328 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v9, off, s[0:3], s32 offset:332 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v10, off, s[0:3], s32 offset:336 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:340 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT:    buffer_load_dword v16, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
-; GFX9-O0-NEXT:    buffer_load_dword v17, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v18, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:324 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:328 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:332 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:336 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:340 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v9, off, s[0:3], s32 offset:344 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v10, off, s[0:3], s32 offset:348 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:352 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v16, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v17, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v13, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v14, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v19, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v20, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v21, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v22, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    s_waitcnt vmcnt(9)
+; GFX9-O0-NEXT:    buffer_load_dword v18, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v19, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v20, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v21, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    s_waitcnt vmcnt(8)
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v10
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    v_lshrrev_b64 v[6:7], v4, v[21:22]
+; GFX9-O0-NEXT:    v_lshrrev_b64 v[6:7], v4, v[20:21]
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v7
 ; GFX9-O0-NEXT:    s_mov_b32 s6, 64
 ; GFX9-O0-NEXT:    v_sub_u32_e64 v12, s6, v4
-; GFX9-O0-NEXT:    v_lshlrev_b64 v[23:24], v12, v[19:20]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v24
+; GFX9-O0-NEXT:    v_lshlrev_b64 v[22:23], v12, v[18:19]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v23
 ; GFX9-O0-NEXT:    v_or_b32_e64 v5, v5, v12
 ; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 killed $vgpr6_vgpr7 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v23
+; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v22
 ; GFX9-O0-NEXT:    v_or_b32_e64 v6, v6, v7
 ; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v5
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v7
 ; GFX9-O0-NEXT:    v_cmp_lt_u32_e64 s[4:5], v4, s6
 ; GFX9-O0-NEXT:    v_sub_u32_e64 v5, v4, s6
-; GFX9-O0-NEXT:    v_lshrrev_b64 v[23:24], v5, v[19:20]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v24
+; GFX9-O0-NEXT:    v_lshrrev_b64 v[22:23], v5, v[18:19]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v23
 ; GFX9-O0-NEXT:    v_cndmask_b32_e64 v5, v5, v12, s[4:5]
 ; GFX9-O0-NEXT:    s_mov_b32 s6, 0
 ; GFX9-O0-NEXT:    v_cmp_eq_u32_e64 s[6:7], v4, s6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v22
+; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v21
 ; GFX9-O0-NEXT:    v_cndmask_b32_e64 v5, v5, v12, s[6:7]
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v23
+; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v22
 ; GFX9-O0-NEXT:    v_cndmask_b32_e64 v6, v6, v7, s[4:5]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v21
+; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v20
 ; GFX9-O0-NEXT:    v_cndmask_b32_e64 v6, v6, v7, s[6:7]
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
 ; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v5
-; GFX9-O0-NEXT:    v_lshrrev_b64 v[4:5], v4, v[19:20]
+; GFX9-O0-NEXT:    v_lshrrev_b64 v[4:5], v4, v[18:19]
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v5
 ; GFX9-O0-NEXT:    s_mov_b64 s[6:7], 0
 ; GFX9-O0-NEXT:    s_mov_b32 s8, s7
@@ -1008,12 +1011,12 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    s_mov_b64 s[8:9], -1
 ; GFX9-O0-NEXT:    s_mov_b32 s5, s8
 ; GFX9-O0-NEXT:    s_mov_b32 s4, s9
-; GFX9-O0-NEXT:    v_mov_b32_e32 v14, v17
-; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v18
-; GFX9-O0-NEXT:    v_mov_b32_e32 v17, s5
-; GFX9-O0-NEXT:    v_add_co_u32_e32 v12, vcc, v12, v17
-; GFX9-O0-NEXT:    v_mov_b32_e32 v17, s4
-; GFX9-O0-NEXT:    v_addc_co_u32_e32 v17, vcc, v15, v17, vcc
+; GFX9-O0-NEXT:    v_mov_b32_e32 v14, v16
+; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v17
+; GFX9-O0-NEXT:    v_mov_b32_e32 v16, s5
+; GFX9-O0-NEXT:    v_add_co_u32_e32 v12, vcc, v12, v16
+; GFX9-O0-NEXT:    v_mov_b32_e32 v16, s4
+; GFX9-O0-NEXT:    v_addc_co_u32_e32 v16, vcc, v15, v16, vcc
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v15, s5
 ; GFX9-O0-NEXT:    v_addc_co_u32_e32 v14, vcc, v14, v15, vcc
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v15, s4
@@ -1025,427 +1028,424 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
 ; GFX9-O0-NEXT:    ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v17
+; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v16
 ; GFX9-O0-NEXT:    s_mov_b64 s[8:9], s[6:7]
-; GFX9-O0-NEXT:    buffer_store_dword v14, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v14, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v15, off, s[0:3], s32 offset:300 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:304 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v15, off, s[0:3], s32 offset:312 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:316 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:320 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_mov_b64 s[4:5], s[6:7]
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v15, s9
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v14, s8
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v13, s7
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v12, s6
-; GFX9-O0-NEXT:    v_writelane_b32 v16, s4, 8
-; GFX9-O0-NEXT:    v_writelane_b32 v16, s5, 9
+; GFX9-O0-NEXT:    v_writelane_b32 v30, s4, 8
+; GFX9-O0-NEXT:    v_writelane_b32 v30, s5, 9
 ; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT:    buffer_store_dword v16, off, s[0:3], s32 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
-; GFX9-O0-NEXT:    buffer_store_dword v14, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v14, off, s[0:3], s32 offset:300 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v15, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v15, off, s[0:3], s32 offset:304 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v10, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v10, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v11, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v11, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v9, off, s[0:3], s32 offset:268 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v9, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:268 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:248 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:252 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:252 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:248 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_branch .LBB0_6
 ; GFX9-O0-NEXT:  .LBB0_8: ; %udiv-bb1
 ; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
-; GFX9-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v12, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v10, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_mov_b64 s[6:7], 1
 ; GFX9-O0-NEXT:    s_mov_b32 s5, s6
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v1
+; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v0
 ; GFX9-O0-NEXT:    s_mov_b32 s4, s7
 ; GFX9-O0-NEXT:    s_mov_b64 s[6:7], 0
 ; GFX9-O0-NEXT:    s_mov_b32 s8, s6
 ; GFX9-O0-NEXT:    s_mov_b32 s9, s7
-; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v5
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, s5
-; GFX9-O0-NEXT:    v_add_co_u32_e32 v9, vcc, v4, v5
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, s4
-; GFX9-O0-NEXT:    v_addc_co_u32_e32 v2, vcc, v2, v5, vcc
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, s8
-; GFX9-O0-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v5, vcc
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, s9
-; GFX9-O0-NEXT:    v_addc_co_u32_e32 v3, vcc, v3, v5, vcc
+; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v4
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v5
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s5
+; GFX9-O0-NEXT:    v_add_co_u32_e32 v8, vcc, v3, v4
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s4
+; GFX9-O0-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v4, vcc
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s8
+; GFX9-O0-NEXT:    v_addc_co_u32_e32 v0, vcc, v0, v4, vcc
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s9
+; GFX9-O0-NEXT:    v_addc_co_u32_e32 v2, vcc, v2, v4, vcc
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
-; GFX9-O0-NEXT:    ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v2
+; GFX9-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v1
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
-; GFX9-O0-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v3
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v2
+; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v2
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v1
-; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:328 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v0
+; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:340 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:332 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:344 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v8
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v9
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v10
-; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:336 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:348 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:340 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:352 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_mov_b32 s4, 0x7f
-; GFX9-O0-NEXT:    v_sub_u32_e64 v3, s4, v4
-; GFX9-O0-NEXT:    v_lshlrev_b64 v[5:6], v3, v[11:12]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v6
+; GFX9-O0-NEXT:    v_sub_u32_e64 v2, s4, v3
+; GFX9-O0-NEXT:    v_lshlrev_b64 v[4:5], v2, v[10:11]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v5
 ; GFX9-O0-NEXT:    s_mov_b32 s4, 64
-; GFX9-O0-NEXT:    v_sub_u32_e64 v14, s4, v3
-; GFX9-O0-NEXT:    v_lshrrev_b64 v[14:15], v14, v[7:8]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v15
-; GFX9-O0-NEXT:    v_or_b32_e64 v13, v13, v16
-; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 killed $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v14
-; GFX9-O0-NEXT:    v_or_b32_e64 v5, v5, v6
-; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v13
-; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v6
-; GFX9-O0-NEXT:    v_cmp_lt_u32_e64 s[4:5], v3, s4
+; GFX9-O0-NEXT:    v_sub_u32_e64 v13, s4, v2
+; GFX9-O0-NEXT:    v_lshrrev_b64 v[13:14], v13, v[6:7]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v14
+; GFX9-O0-NEXT:    v_or_b32_e64 v12, v12, v15
+; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v13
+; GFX9-O0-NEXT:    v_or_b32_e64 v4, v4, v5
+; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v12
+; GFX9-O0-NEXT:    v_mov_b32_e32 v14, v5
+; GFX9-O0-NEXT:    v_cmp_lt_u32_e64 s[4:5], v2, s4
 ; GFX9-O0-NEXT:    s_mov_b32 s10, 63
-; GFX9-O0-NEXT:    v_sub_u32_e64 v4, s10, v4
-; GFX9-O0-NEXT:    v_lshlrev_b64 v[13:14], v4, v[7:8]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v14
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v15, s[4:5]
+; GFX9-O0-NEXT:    v_sub_u32_e64 v3, s10, v3
+; GFX9-O0-NEXT:    v_lshlrev_b64 v[12:13], v3, v[6:7]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v13
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v3, v3, v14, s[4:5]
 ; GFX9-O0-NEXT:    s_mov_b32 s10, 0
-; GFX9-O0-NEXT:    v_cmp_eq_u32_e64 s[10:11], v3, s10
-; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v12
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v15, s[10:11]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v5
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v13
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v5, v5, v6, s[4:5]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v11
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v5, v5, v6, s[10:11]
+; GFX9-O0-NEXT:    v_cmp_eq_u32_e64 s[10:11], v2, s10
+; GFX9-O0-NEXT:    v_mov_b32_e32 v14, v11
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v3, v3, v14, s[10:11]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v4
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v12
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v5, s[4:5]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v10
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v5, s[10:11]
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr10
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr10
-; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v4
-; GFX9-O0-NEXT:    v_lshlrev_b64 v[7:8], v3, v[7:8]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v8
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, s9
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v3, v3, v4, s[4:5]
-; GFX9-O0-NEXT:    ; kill: def $vgpr7 killed $vgpr7 killed $vgpr7_vgpr8 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s8
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v7, v4, v7, s[4:5]
+; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v3
+; GFX9-O0-NEXT:    v_lshlrev_b64 v[6:7], v2, v[6:7]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v7
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, s9
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v2, v2, v3, s[4:5]
+; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 killed $vgpr6_vgpr7 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v3, s8
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v6, v3, v6, s[4:5]
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
-; GFX9-O0-NEXT:    ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v3
-; GFX9-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:320 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v2
+; GFX9-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:332 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:324 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:312 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:336 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:324 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:316 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v2
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v10
-; GFX9-O0-NEXT:    v_or_b32_e64 v3, v3, v4
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v1
-; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v9
-; GFX9-O0-NEXT:    v_or_b32_e64 v1, v1, v2
-; GFX9-O0-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v3
-; GFX9-O0-NEXT:    v_cmp_ne_u64_e64 s[4:5], v[1:2], s[6:7]
+; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:328 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v1
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v9
+; GFX9-O0-NEXT:    v_or_b32_e64 v2, v2, v3
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v0
+; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v8
+; GFX9-O0-NEXT:    v_or_b32_e64 v0, v0, v1
+; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v2
+; GFX9-O0-NEXT:    v_cmp_ne_u64_e64 s[4:5], v[0:1], s[6:7]
 ; GFX9-O0-NEXT:    s_mov_b64 s[8:9], s[6:7]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, s8
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s9
-; GFX9-O0-NEXT:    v_mov_b32_e32 v1, s6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, s7
-; GFX9-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, s8
+; GFX9-O0-NEXT:    v_mov_b32_e32 v3, s9
+; GFX9-O0-NEXT:    v_mov_b32_e32 v0, s6
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, s7
+; GFX9-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_mov_b64 s[6:7], exec
 ; GFX9-O0-NEXT:    s_and_b64 s[4:5], s[6:7], s[4:5]
 ; GFX9-O0-NEXT:    s_xor_b64 s[6:7], s[4:5], s[6:7]
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s6, 6
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s7, 7
+; GFX9-O0-NEXT:    v_writelane_b32 v30, s6, 6
+; GFX9-O0-NEXT:    v_writelane_b32 v30, s7, 7
 ; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[4:5]
 ; GFX9-O0-NEXT:    s_cbranch_execz .LBB0_5
 ; GFX9-O0-NEXT:    s_branch .LBB0_7
 ; GFX9-O0-NEXT:  .LBB0_9: ; %udiv-end
-; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
-; GFX9-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:112 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v9, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v10, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v13, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v14, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v15, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v16, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v19, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v20, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:192 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v12, off, s[0:3], s32 offset:196 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:184 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:188 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v17, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v18, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v10, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:112 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v12, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v13, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v18, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v19, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v14, off, s[0:3], s32 offset:204 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v15, off, s[0:3], s32 offset:208 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v16, off, s[0:3], s32 offset:196 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v17, off, s[0:3], s32 offset:200 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v20, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v21, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_mov_b32 s4, 32
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(2)
-; GFX9-O0-NEXT:    v_lshrrev_b64 v[0:1], s4, v[5:6]
+; GFX9-O0-NEXT:    v_lshrrev_b64 v[0:1], s4, v[16:17]
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v0
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v20
+; GFX9-O0-NEXT:    v_mul_lo_u32 v8, v1, v0
+; GFX9-O0-NEXT:    v_lshrrev_b64 v[20:21], s4, v[20:21]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v20
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v16
+; GFX9-O0-NEXT:    v_mul_lo_u32 v5, v2, v5
+; GFX9-O0-NEXT:    v_mad_u64_u32 v[16:17], s[6:7], v2, v0, 0
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v17
-; GFX9-O0-NEXT:    v_mul_lo_u32 v3, v1, v0
-; GFX9-O0-NEXT:    v_lshrrev_b64 v[17:18], s4, v[17:18]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v17
-; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 killed $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT:    v_mul_lo_u32 v2, v5, v2
-; GFX9-O0-NEXT:    v_mad_u64_u32 v[17:18], s[6:7], v5, v0, 0
-; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v18
-; GFX9-O0-NEXT:    v_add3_u32 v2, v0, v2, v3
+; GFX9-O0-NEXT:    v_add3_u32 v8, v0, v5, v8
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr5
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v0, s5
-; GFX9-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v0
-; GFX9-O0-NEXT:    v_lshlrev_b64 v[2:3], s4, v[2:3]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v3
-; GFX9-O0-NEXT:    ; kill: def $vgpr17 killed $vgpr17 killed $vgpr17_vgpr18 killed $exec
+; GFX9-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v0
+; GFX9-O0-NEXT:    v_lshlrev_b64 v[8:9], s4, v[8:9]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v9
+; GFX9-O0-NEXT:    ; kill: def $vgpr16 killed $vgpr16 killed $vgpr16_vgpr17 killed $exec
 ; GFX9-O0-NEXT:    s_mov_b32 s5, 0
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v0, s5
-; GFX9-O0-NEXT:    ; kill: def $vgpr17 killed $vgpr17 def $vgpr17_vgpr18 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v18, v0
+; GFX9-O0-NEXT:    ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v0
+; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v17
+; GFX9-O0-NEXT:    v_or_b32_e64 v0, v0, v5
+; GFX9-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 killed $vgpr8_vgpr9 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v16
+; GFX9-O0-NEXT:    v_or_b32_e64 v16, v5, v8
+; GFX9-O0-NEXT:    ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v0
+; GFX9-O0-NEXT:    v_lshrrev_b64 v[8:9], s4, v[18:19]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v8
+; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v14
+; GFX9-O0-NEXT:    v_mul_lo_u32 v9, v8, v5
+; GFX9-O0-NEXT:    v_lshrrev_b64 v[14:15], s4, v[14:15]
+; GFX9-O0-NEXT:    ; kill: def $vgpr14 killed $vgpr14 killed $vgpr14_vgpr15 killed $exec
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v18
-; GFX9-O0-NEXT:    v_or_b32_e64 v0, v0, v6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v2
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v17
-; GFX9-O0-NEXT:    v_or_b32_e64 v17, v2, v3
-; GFX9-O0-NEXT:    ; kill: def $vgpr17 killed $vgpr17 def $vgpr17_vgpr18 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v18, v0
-; GFX9-O0-NEXT:    v_lshrrev_b64 v[2:3], s4, v[19:20]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v2
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v11
-; GFX9-O0-NEXT:    v_mul_lo_u32 v3, v2, v6
-; GFX9-O0-NEXT:    v_lshrrev_b64 v[11:12], s4, v[11:12]
-; GFX9-O0-NEXT:    ; kill: def $vgpr11 killed $vgpr11 killed $vgpr11_vgpr12 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v19
-; GFX9-O0-NEXT:    v_mul_lo_u32 v11, v11, v0
-; GFX9-O0-NEXT:    v_mad_u64_u32 v[19:20], s[6:7], v2, v0, 0
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v20
-; GFX9-O0-NEXT:    v_add3_u32 v2, v2, v3, v11
+; GFX9-O0-NEXT:    v_mul_lo_u32 v14, v14, v0
+; GFX9-O0-NEXT:    v_mad_u64_u32 v[18:19], s[6:7], v8, v0, 0
+; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v19
+; GFX9-O0-NEXT:    v_add3_u32 v8, v8, v9, v14
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr7
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr7
-; GFX9-O0-NEXT:    v_mov_b32_e32 v11, s6
-; GFX9-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v11
-; GFX9-O0-NEXT:    v_lshlrev_b64 v[2:3], s4, v[2:3]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v3
-; GFX9-O0-NEXT:    ; kill: def $vgpr19 killed $vgpr19 killed $vgpr19_vgpr20 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v14, s6
+; GFX9-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v14
+; GFX9-O0-NEXT:    v_lshlrev_b64 v[8:9], s4, v[8:9]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v9
+; GFX9-O0-NEXT:    ; kill: def $vgpr18 killed $vgpr18 killed $vgpr18_vgpr19 killed $exec
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v11, s5
-; GFX9-O0-NEXT:    ; kill: def $vgpr19 killed $vgpr19 def $vgpr19_vgpr20 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v20, v11
-; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v20
-; GFX9-O0-NEXT:    v_or_b32_e64 v11, v11, v12
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v2
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v19
-; GFX9-O0-NEXT:    v_or_b32_e64 v2, v2, v3
-; GFX9-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v11
-; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v2
-; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v17
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v3
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v18
-; GFX9-O0-NEXT:    v_add_co_u32_e64 v17, s[6:7], v11, v12
-; GFX9-O0-NEXT:    v_addc_co_u32_e64 v2, s[6:7], v2, v3, s[6:7]
-; GFX9-O0-NEXT:    ; kill: def $vgpr17 killed $vgpr17 def $vgpr17_vgpr18 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v18, v2
-; GFX9-O0-NEXT:    v_mad_u64_u32 v[19:20], s[6:7], v6, v1, 0
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v19
+; GFX9-O0-NEXT:    v_mov_b32_e32 v14, s5
+; GFX9-O0-NEXT:    ; kill: def $vgpr18 killed $vgpr18 def $vgpr18_vgpr19 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v19, v14
+; GFX9-O0-NEXT:    v_mov_b32_e32 v14, v19
+; GFX9-O0-NEXT:    v_or_b32_e64 v14, v14, v15
+; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v8
+; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v18
+; GFX9-O0-NEXT:    v_or_b32_e64 v8, v8, v9
+; GFX9-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v14
+; GFX9-O0-NEXT:    v_mov_b32_e32 v14, v8
+; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v16
+; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v9
+; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v17
+; GFX9-O0-NEXT:    v_add_co_u32_e64 v16, s[6:7], v14, v15
+; GFX9-O0-NEXT:    v_addc_co_u32_e64 v8, s[6:7], v8, v9, s[6:7]
+; GFX9-O0-NEXT:    ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v8
+; GFX9-O0-NEXT:    v_mad_u64_u32 v[14:15], s[6:7], v5, v1, 0
+; GFX9-O0-NEXT:    v_mov_b32_e32 v18, v14
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v11, s5
-; GFX9-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v11
-; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v3
-; GFX9-O0-NEXT:    v_mov_b32_e32 v19, v20
+; GFX9-O0-NEXT:    v_mov_b32_e32 v8, s5
+; GFX9-O0-NEXT:    ; kill: def $vgpr18 killed $vgpr18 def $vgpr18_vgpr19 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v19, v8
+; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v19
+; GFX9-O0-NEXT:    v_mov_b32_e32 v14, v15
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr7
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr7
-; GFX9-O0-NEXT:    v_mov_b32_e32 v12, s6
-; GFX9-O0-NEXT:    ; kill: def $vgpr19 killed $vgpr19 def $vgpr19_vgpr20 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v20, v12
-; GFX9-O0-NEXT:    v_lshlrev_b64 v[19:20], s4, v[19:20]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v20
-; GFX9-O0-NEXT:    v_or_b32_e64 v11, v11, v12
-; GFX9-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr2 killed $vgpr2_vgpr3 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v19
-; GFX9-O0-NEXT:    v_or_b32_e64 v2, v2, v3
-; GFX9-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v11
-; GFX9-O0-NEXT:    v_mad_u64_u32 v[19:20], s[6:7], v6, v5, 0
-; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v19
+; GFX9-O0-NEXT:    v_mov_b32_e32 v9, s6
+; GFX9-O0-NEXT:    ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v9
+; GFX9-O0-NEXT:    v_lshlrev_b64 v[14:15], s4, v[14:15]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v15
+; GFX9-O0-NEXT:    v_or_b32_e64 v8, v8, v9
+; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v18
+; GFX9-O0-NEXT:    ; kill: def $vgpr14 killed $vgpr14 killed $vgpr14_vgpr15 killed $exec
+; GFX9-O0-NEXT:    v_or_b32_e64 v20, v9, v14
+; GFX9-O0-NEXT:    ; kill: def $vgpr20 killed $vgpr20 def $vgpr20_vgpr21 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v21, v8
+; GFX9-O0-NEXT:    v_mad_u64_u32 v[14:15], s[6:7], v5, v2, 0
+; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v14
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, s5
-; GFX9-O0-NEXT:    ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v12
-; GFX9-O0-NEXT:    v_mov_b32_e32 v19, v20
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, s5
+; GFX9-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v5
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v9
+; GFX9-O0-NEXT:    v_mov_b32_e32 v14, v15
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr7
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr7
-; GFX9-O0-NEXT:    v_mov_b32_e32 v21, s6
-; GFX9-O0-NEXT:    ; kill: def $vgpr19 killed $vgpr19 def $vgpr19_vgpr20 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v20, v21
-; GFX9-O0-NEXT:    v_lshlrev_b64 v[19:20], s4, v[19:20]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v21, v20
-; GFX9-O0-NEXT:    v_or_b32_e64 v6, v6, v21
-; GFX9-O0-NEXT:    ; kill: def $vgpr11 killed $vgpr11 killed $vgpr11_vgpr12 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v19
-; GFX9-O0-NEXT:    v_or_b32_e64 v23, v11, v12
-; GFX9-O0-NEXT:    ; kill: def $vgpr23 killed $vgpr23 def $vgpr23_vgpr24 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v24, v6
-; GFX9-O0-NEXT:    v_mad_u64_u32 v[11:12], s[6:7], v0, v5, 0
-; GFX9-O0-NEXT:    v_mov_b32_e32 v21, v12
+; GFX9-O0-NEXT:    v_mov_b32_e32 v18, s6
+; GFX9-O0-NEXT:    ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v18
+; GFX9-O0-NEXT:    v_lshlrev_b64 v[14:15], s4, v[14:15]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v18, v15
+; GFX9-O0-NEXT:    v_or_b32_e64 v5, v5, v18
+; GFX9-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 killed $vgpr8_vgpr9 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v14
+; GFX9-O0-NEXT:    v_or_b32_e64 v22, v8, v9
+; GFX9-O0-NEXT:    ; kill: def $vgpr22 killed $vgpr22 def $vgpr22_vgpr23 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v23, v5
+; GFX9-O0-NEXT:    v_mad_u64_u32 v[14:15], s[6:7], v0, v2, 0
+; GFX9-O0-NEXT:    v_mov_b32_e32 v18, v15
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, s5
-; GFX9-O0-NEXT:    ; kill: def $vgpr21 killed $vgpr21 def $vgpr21_vgpr22 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v22, v5
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v23
-; GFX9-O0-NEXT:    v_mov_b32_e32 v20, v21
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v24
-; GFX9-O0-NEXT:    v_mov_b32_e32 v19, v22
-; GFX9-O0-NEXT:    v_add_co_u32_e64 v5, s[6:7], v5, v20
-; GFX9-O0-NEXT:    v_addc_co_u32_e64 v19, s[6:7], v6, v19, s[6:7]
-; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v19
-; GFX9-O0-NEXT:    v_mov_b32_e32 v19, v6
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, s5
+; GFX9-O0-NEXT:    ; kill: def $vgpr18 killed $vgpr18 def $vgpr18_vgpr19 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v19, v2
+; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v22
+; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v18
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v23
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v19
+; GFX9-O0-NEXT:    v_add_co_u32_e64 v8, s[6:7], v8, v9
+; GFX9-O0-NEXT:    v_addc_co_u32_e64 v2, s[6:7], v2, v5, s[6:7]
+; GFX9-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v2
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v9
 ; GFX9-O0-NEXT:    s_mov_b64 s[6:7], 0xffffffff
 ; GFX9-O0-NEXT:    s_mov_b32 s8, s7
-; GFX9-O0-NEXT:    v_and_b32_e64 v19, v19, s8
-; GFX9-O0-NEXT:    v_mov_b32_e32 v20, v5
+; GFX9-O0-NEXT:    v_and_b32_e64 v2, v2, s8
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v8
 ; GFX9-O0-NEXT:    ; kill: def $sgpr6 killed $sgpr6 killed $sgpr6_sgpr7
-; GFX9-O0-NEXT:    v_and_b32_e64 v21, v20, s6
-; GFX9-O0-NEXT:    ; kill: def $vgpr21 killed $vgpr21 def $vgpr21_vgpr22 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v22, v19
-; GFX9-O0-NEXT:    v_mad_u64_u32 v[19:20], s[6:7], v0, v1, 0
-; GFX9-O0-NEXT:    v_mov_b32_e32 v23, v19
+; GFX9-O0-NEXT:    v_and_b32_e64 v18, v5, s6
+; GFX9-O0-NEXT:    ; kill: def $vgpr18 killed $vgpr18 def $vgpr18_vgpr19 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v19, v2
+; GFX9-O0-NEXT:    v_mad_u64_u32 v[22:23], s[6:7], v0, v1, 0
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v22
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v0, s5
-; GFX9-O0-NEXT:    ; kill: def $vgpr23 killed $vgpr23 def $vgpr23_vgpr24 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v24, v0
-; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v24
-; GFX9-O0-NEXT:    v_mov_b32_e32 v19, v20
+; GFX9-O0-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v0
+; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v2
+; GFX9-O0-NEXT:    v_mov_b32_e32 v22, v23
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr7
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr7
-; GFX9-O0-NEXT:    v_mov_b32_e32 v1, s6
-; GFX9-O0-NEXT:    ; kill: def $vgpr19 killed $vgpr19 def $vgpr19_vgpr20 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v20, v1
-; GFX9-O0-NEXT:    v_lshlrev_b64 v[19:20], s4, v[19:20]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v20
-; GFX9-O0-NEXT:    v_or_b32_e64 v0, v0, v1
-; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v23
-; GFX9-O0-NEXT:    ; kill: def $vgpr19 killed $vgpr19 killed $vgpr19_vgpr20 killed $exec
-; GFX9-O0-NEXT:    v_or_b32_e64 v23, v1, v19
-; GFX9-O0-NEXT:    ; kill: def $vgpr23 killed $vgpr23 def $vgpr23_vgpr24 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v24, v0
-; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v23
-; GFX9-O0-NEXT:    v_mov_b32_e32 v20, v21
-; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v24
-; GFX9-O0-NEXT:    v_mov_b32_e32 v19, v22
-; GFX9-O0-NEXT:    v_add_co_u32_e64 v0, s[6:7], v0, v20
-; GFX9-O0-NEXT:    v_addc_co_u32_e64 v19, s[6:7], v1, v19, s[6:7]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, s6
+; GFX9-O0-NEXT:    ; kill: def $vgpr22 killed $vgpr22 def $vgpr22_vgpr23 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v23, v5
+; GFX9-O0-NEXT:    v_lshlrev_b64 v[22:23], s4, v[22:23]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v23
+; GFX9-O0-NEXT:    v_or_b32_e64 v0, v0, v5
+; GFX9-O0-NEXT:    ; kill: def $vgpr1 killed $vgpr1 killed $vgpr1_vgpr2 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v22
+; GFX9-O0-NEXT:    v_or_b32_e64 v1, v1, v2
+; GFX9-O0-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v0
+; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v1
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v18
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v2
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v19
+; GFX9-O0-NEXT:    v_add_co_u32_e64 v0, s[6:7], v0, v5
+; GFX9-O0-NEXT:    v_addc_co_u32_e64 v2, s[6:7], v1, v2, s[6:7]
 ; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v19
-; GFX9-O0-NEXT:    v_lshrrev_b64 v[21:22], s4, v[0:1]
-; GFX9-O0-NEXT:    v_lshrrev_b64 v[5:6], s4, v[5:6]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v19, v5
-; GFX9-O0-NEXT:    v_mov_b32_e32 v20, v21
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v22
-; GFX9-O0-NEXT:    v_add_co_u32_e64 v19, s[6:7], v19, v20
-; GFX9-O0-NEXT:    v_addc_co_u32_e64 v5, s[6:7], v5, v6, s[6:7]
-; GFX9-O0-NEXT:    ; kill: def $vgpr19 killed $vgpr19 def $vgpr19_vgpr20 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v20, v5
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v2
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v19
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v3
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v20
-; GFX9-O0-NEXT:    v_add_co_u32_e64 v19, s[6:7], v5, v6
-; GFX9-O0-NEXT:    v_addc_co_u32_e64 v2, s[6:7], v2, v3, s[6:7]
-; GFX9-O0-NEXT:    ; kill: def $vgpr19 killed $vgpr19 def $vgpr19_vgpr20 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v20, v2
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v2
+; GFX9-O0-NEXT:    v_lshrrev_b64 v[18:19], s4, v[0:1]
+; GFX9-O0-NEXT:    v_lshrrev_b64 v[22:23], s4, v[8:9]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v22
+; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v18
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v23
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v19
+; GFX9-O0-NEXT:    v_add_co_u32_e64 v18, s[6:7], v8, v9
+; GFX9-O0-NEXT:    v_addc_co_u32_e64 v2, s[6:7], v2, v5, s[6:7]
+; GFX9-O0-NEXT:    ; kill: def $vgpr18 killed $vgpr18 def $vgpr18_vgpr19 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v19, v2
+; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v20
+; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v18
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v21
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v19
+; GFX9-O0-NEXT:    v_add_co_u32_e64 v18, s[6:7], v8, v9
+; GFX9-O0-NEXT:    v_addc_co_u32_e64 v2, s[6:7], v2, v5, s[6:7]
+; GFX9-O0-NEXT:    ; kill: def $vgpr18 killed $vgpr18 def $vgpr18_vgpr19 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v19, v2
+; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v18
+; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v16
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v19
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v17
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v20
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v18
-; GFX9-O0-NEXT:    v_add_co_u32_e64 v2, s[6:7], v2, v6
-; GFX9-O0-NEXT:    v_addc_co_u32_e64 v5, s[6:7], v3, v5, s[6:7]
-; GFX9-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v5
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v17
+; GFX9-O0-NEXT:    v_add_co_u32_e64 v8, s[6:7], v8, v9
+; GFX9-O0-NEXT:    v_addc_co_u32_e64 v2, s[6:7], v2, v5, s[6:7]
+; GFX9-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v2
 ; GFX9-O0-NEXT:    v_lshlrev_b64 v[0:1], s4, v[0:1]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v1
-; GFX9-O0-NEXT:    ; kill: def $vgpr11 killed $vgpr11 killed $vgpr11_vgpr12 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v1
+; GFX9-O0-NEXT:    ; kill: def $vgpr14 killed $vgpr14 killed $vgpr14_vgpr15 killed $exec
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, s5
-; GFX9-O0-NEXT:    ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v5
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v12
-; GFX9-O0-NEXT:    v_or_b32_e64 v5, v5, v6
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, s5
+; GFX9-O0-NEXT:    ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v2
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v15
+; GFX9-O0-NEXT:    v_or_b32_e64 v2, v2, v5
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v0
-; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v11
+; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v14
 ; GFX9-O0-NEXT:    v_or_b32_e64 v0, v0, v1
 ; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v5
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v2
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v3
-; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v0
-; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v1
-; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v15
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v16
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v13
-; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v14
-; GFX9-O0-NEXT:    v_sub_co_u32_e32 v0, vcc, v0, v12
-; GFX9-O0-NEXT:    v_subb_co_u32_e32 v3, vcc, v3, v11, vcc
-; GFX9-O0-NEXT:    v_subb_co_u32_e32 v5, vcc, v5, v6, vcc
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v2
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v8
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v9
+; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v0
+; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v1
+; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v3
+; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v4
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v12
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v13
+; GFX9-O0-NEXT:    v_sub_co_u32_e32 v0, vcc, v0, v9
+; GFX9-O0-NEXT:    v_subb_co_u32_e32 v3, vcc, v3, v8, vcc
+; GFX9-O0-NEXT:    v_subb_co_u32_e32 v4, vcc, v4, v5, vcc
 ; GFX9-O0-NEXT:    v_subb_co_u32_e32 v2, vcc, v1, v2, vcc
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr5
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr5
@@ -1453,53 +1453,48 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v3
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr5
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr5
-; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v2
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v10
+; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v2
+; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v5
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v7
 ; GFX9-O0-NEXT:    v_xor_b32_e64 v3, v3, v2
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v5
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v9
-; GFX9-O0-NEXT:    v_xor_b32_e64 v9, v6, v5
-; GFX9-O0-NEXT:    ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v3
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v4
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v6
+; GFX9-O0-NEXT:    v_xor_b32_e64 v8, v5, v4
+; GFX9-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v3
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v1
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v8
+; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v11
 ; GFX9-O0-NEXT:    v_xor_b32_e64 v3, v3, v6
 ; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v7
-; GFX9-O0-NEXT:    v_xor_b32_e64 v0, v0, v8
+; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v10
+; GFX9-O0-NEXT:    v_xor_b32_e64 v0, v0, v7
 ; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v3
-; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v0
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v0
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v1
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v9
-; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v10
-; GFX9-O0-NEXT:    v_sub_co_u32_e32 v7, vcc, v7, v8
+; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v8
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v9
+; GFX9-O0-NEXT:    v_sub_co_u32_e32 v5, vcc, v5, v7
 ; GFX9-O0-NEXT:    v_subb_co_u32_e32 v0, vcc, v0, v6, vcc
-; GFX9-O0-NEXT:    v_subb_co_u32_e32 v5, vcc, v3, v5, vcc
+; GFX9-O0-NEXT:    v_subb_co_u32_e32 v3, vcc, v3, v4, vcc
 ; GFX9-O0-NEXT:    v_subb_co_u32_e32 v1, vcc, v1, v2, vcc
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr5
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr5
-; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v1
+; GFX9-O0-NEXT:    ; kill: def $vgpr3 killed $vgpr3 def $vgpr3_vgpr4 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v1
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr5
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr5
-; GFX9-O0-NEXT:    ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v0
-; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v7
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v5
-; GFX9-O0-NEXT:    v_lshrrev_b64 v[7:8], s4, v[7:8]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v7
+; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v0
+; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v5
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v3
 ; GFX9-O0-NEXT:    v_lshrrev_b64 v[5:6], s4, v[5:6]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v5
-; GFX9-O0-NEXT:    ; kill: killed $vgpr4
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v5
+; GFX9-O0-NEXT:    v_lshrrev_b64 v[3:4], s4, v[3:4]
+; GFX9-O0-NEXT:    ; kill: def $vgpr3 killed $vgpr3 killed $vgpr3_vgpr4 killed $exec
 ; GFX9-O0-NEXT:    s_xor_saveexec_b64 s[4:5], -1
-; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:344 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    s_nop 0
-; GFX9-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:348 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:352 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v16, off, s[0:3], s32 offset:356 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v30, off, s[0:3], s32 offset:356 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[4:5]
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-O0-NEXT:    s_setpc_b64 s[30:31]
@@ -1716,272 +1711,265 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0:       ; %bb.0: ; %_udiv-special-cases
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-O0-NEXT:    s_xor_saveexec_b64 s[4:5], -1
-; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:328 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:332 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:336 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:340 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v30, off, s[0:3], s32 offset:328 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[4:5]
-; GFX9-O0-NEXT:    ; implicit-def: $vgpr8 : SGPR spill to VGPR lane
-; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v6
+; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v6
 ; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v2
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v1
-; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v0
-; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v2
+; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v0
+; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
-; GFX9-O0-NEXT:    ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v2
+; GFX9-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v1
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
-; GFX9-O0-NEXT:    ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v14, v3
+; GFX9-O0-NEXT:    ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v3
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
-; GFX9-O0-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v5
+; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v5
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
-; GFX9-O0-NEXT:    ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v7
+; GFX9-O0-NEXT:    ; kill: def $vgpr10 killed $vgpr10 def $vgpr10_vgpr11 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v7
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4_sgpr5
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4_sgpr5
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4_sgpr5
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4_sgpr5
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v12
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v13
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v14
-; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v8
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v9
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v10
-; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v10
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v11
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v12
-; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v2
+; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v1
-; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v0
+; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v10
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v11
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v12
-; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v2
+; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v1
-; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v0
+; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v12
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v13
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v14
-; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v8
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v9
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v10
-; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v12
-; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v2
-; GFX9-O0-NEXT:    v_or_b32_e64 v3, v8, v7
+; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v11
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v1
-; GFX9-O0-NEXT:    v_or_b32_e64 v1, v5, v6
-; GFX9-O0-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v3
+; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v1
+; GFX9-O0-NEXT:    v_or_b32_e64 v2, v7, v6
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v10
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v0
+; GFX9-O0-NEXT:    v_or_b32_e64 v0, v4, v5
+; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v2
 ; GFX9-O0-NEXT:    s_mov_b64 s[6:7], 0
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s6, 0
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s7, 1
-; GFX9-O0-NEXT:    v_cmp_eq_u64_e64 s[4:5], v[1:2], s[6:7]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v14
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v10
-; GFX9-O0-NEXT:    v_or_b32_e64 v15, v4, v2
+; GFX9-O0-NEXT:    ; implicit-def: $vgpr30 : SGPR spill to VGPR lane
+; GFX9-O0-NEXT:    v_writelane_b32 v30, s6, 0
+; GFX9-O0-NEXT:    v_writelane_b32 v30, s7, 1
+; GFX9-O0-NEXT:    v_cmp_eq_u64_e64 s[4:5], v[0:1], s[6:7]
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v13
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v9
-; GFX9-O0-NEXT:    v_or_b32_e64 v9, v3, v1
-; GFX9-O0-NEXT:    ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v15
-; GFX9-O0-NEXT:    v_cmp_eq_u64_e64 s[8:9], v[9:10], s[6:7]
+; GFX9-O0-NEXT:    v_or_b32_e64 v14, v3, v1
+; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v12
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v8
+; GFX9-O0-NEXT:    v_or_b32_e64 v8, v2, v0
+; GFX9-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v14
+; GFX9-O0-NEXT:    v_cmp_eq_u64_e64 s[8:9], v[8:9], s[6:7]
 ; GFX9-O0-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
-; GFX9-O0-NEXT:    v_ffbh_u32_e64 v6, v6
+; GFX9-O0-NEXT:    v_ffbh_u32_e64 v5, v5
 ; GFX9-O0-NEXT:    s_mov_b32 s9, 32
-; GFX9-O0-NEXT:    v_add_u32_e64 v6, v6, s9
-; GFX9-O0-NEXT:    v_ffbh_u32_e64 v7, v7
-; GFX9-O0-NEXT:    v_min_u32_e64 v6, v6, v7
+; GFX9-O0-NEXT:    v_add_u32_e64 v5, v5, s9
+; GFX9-O0-NEXT:    v_ffbh_u32_e64 v6, v6
+; GFX9-O0-NEXT:    v_min_u32_e64 v5, v5, v6
 ; GFX9-O0-NEXT:    s_mov_b32 s8, 0
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr10
-; GFX9-O0-NEXT:    v_mov_b32_e32 v9, s8
-; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v9
-; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v7
-; GFX9-O0-NEXT:    v_ffbh_u32_e64 v5, v5
-; GFX9-O0-NEXT:    v_add_u32_e64 v5, v5, s9
-; GFX9-O0-NEXT:    v_ffbh_u32_e64 v8, v8
-; GFX9-O0-NEXT:    v_min_u32_e64 v15, v5, v8
+; GFX9-O0-NEXT:    v_mov_b32_e32 v8, s8
+; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v8
+; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v6
+; GFX9-O0-NEXT:    v_ffbh_u32_e64 v4, v4
+; GFX9-O0-NEXT:    v_add_u32_e64 v4, v4, s9
+; GFX9-O0-NEXT:    v_ffbh_u32_e64 v7, v7
+; GFX9-O0-NEXT:    v_min_u32_e64 v14, v4, v7
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr10
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, s8
-; GFX9-O0-NEXT:    ; kill: def $vgpr15 killed $vgpr15 def $vgpr15_vgpr16 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v5
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s8
+; GFX9-O0-NEXT:    ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v4
 ; GFX9-O0-NEXT:    s_mov_b64 s[10:11], 64
-; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v15
+; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v14
 ; GFX9-O0-NEXT:    s_mov_b32 s12, s10
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v16
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v15
 ; GFX9-O0-NEXT:    s_mov_b32 s14, s11
-; GFX9-O0-NEXT:    v_add_co_u32_e64 v8, s[12:13], v8, s12
-; GFX9-O0-NEXT:    v_mov_b32_e32 v9, s14
-; GFX9-O0-NEXT:    v_addc_co_u32_e64 v5, s[12:13], v5, v9, s[12:13]
-; GFX9-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v5
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v9
+; GFX9-O0-NEXT:    v_add_co_u32_e64 v7, s[12:13], v7, s12
+; GFX9-O0-NEXT:    v_mov_b32_e32 v8, s14
+; GFX9-O0-NEXT:    v_addc_co_u32_e64 v4, s[12:13], v4, v8, s[12:13]
+; GFX9-O0-NEXT:    ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v4
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v8
 ; GFX9-O0-NEXT:    s_mov_b64 s[12:13], s[6:7]
-; GFX9-O0-NEXT:    v_cmp_ne_u64_e64 s[12:13], v[11:12], s[12:13]
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v5, v5, v10, s[12:13]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v8
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v9, v6, v7, s[12:13]
+; GFX9-O0-NEXT:    v_cmp_ne_u64_e64 s[12:13], v[10:11], s[12:13]
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v9, s[12:13]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v5
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v7
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v8, v5, v6, s[12:13]
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr12
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr12
-; GFX9-O0-NEXT:    ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v5
+; GFX9-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v4
+; GFX9-O0-NEXT:    v_ffbh_u32_e64 v4, v0
+; GFX9-O0-NEXT:    v_add_u32_e64 v4, v4, s9
 ; GFX9-O0-NEXT:    v_ffbh_u32_e64 v5, v1
-; GFX9-O0-NEXT:    v_add_u32_e64 v5, v5, s9
-; GFX9-O0-NEXT:    v_ffbh_u32_e64 v6, v2
-; GFX9-O0-NEXT:    v_min_u32_e64 v6, v5, v6
+; GFX9-O0-NEXT:    v_min_u32_e64 v5, v4, v5
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr12
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, s8
-; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v5
-; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v7
-; GFX9-O0-NEXT:    v_ffbh_u32_e64 v5, v3
-; GFX9-O0-NEXT:    v_add_u32_e64 v5, v5, s9
-; GFX9-O0-NEXT:    v_ffbh_u32_e64 v11, v4
-; GFX9-O0-NEXT:    v_min_u32_e64 v15, v5, v11
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s8
+; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v4
+; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v6
+; GFX9-O0-NEXT:    v_ffbh_u32_e64 v4, v2
+; GFX9-O0-NEXT:    v_add_u32_e64 v4, v4, s9
+; GFX9-O0-NEXT:    v_ffbh_u32_e64 v10, v3
+; GFX9-O0-NEXT:    v_min_u32_e64 v14, v4, v10
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr9
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, s8
-; GFX9-O0-NEXT:    ; kill: def $vgpr15 killed $vgpr15 def $vgpr15_vgpr16 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v5
-; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v15
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s8
+; GFX9-O0-NEXT:    ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v4
+; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v14
 ; GFX9-O0-NEXT:    s_mov_b32 s8, s10
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v16
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v15
 ; GFX9-O0-NEXT:    s_mov_b32 s10, s11
-; GFX9-O0-NEXT:    v_add_co_u32_e64 v11, s[8:9], v11, s8
-; GFX9-O0-NEXT:    v_mov_b32_e32 v12, s10
-; GFX9-O0-NEXT:    v_addc_co_u32_e64 v5, s[8:9], v5, v12, s[8:9]
-; GFX9-O0-NEXT:    ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v5
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v12
+; GFX9-O0-NEXT:    v_add_co_u32_e64 v10, s[8:9], v10, s8
+; GFX9-O0-NEXT:    v_mov_b32_e32 v11, s10
+; GFX9-O0-NEXT:    v_addc_co_u32_e64 v4, s[8:9], v4, v11, s[8:9]
+; GFX9-O0-NEXT:    ; kill: def $vgpr10 killed $vgpr10 def $vgpr10_vgpr11 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v4
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v11
 ; GFX9-O0-NEXT:    s_mov_b64 s[8:9], s[6:7]
-; GFX9-O0-NEXT:    v_cmp_ne_u64_e64 s[8:9], v[13:14], s[8:9]
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v5, v5, v8, s[8:9]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v11
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v6, v6, v7, s[8:9]
+; GFX9-O0-NEXT:    v_cmp_ne_u64_e64 s[8:9], v[12:13], s[8:9]
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v7, s[8:9]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v5
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v10
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v5, v5, v6, s[8:9]
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
-; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
+; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v4
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v5
-; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v6
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v8
+; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 killed $vgpr5_vgpr6 killed $exec
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v9
-; GFX9-O0-NEXT:    ; kill: def $vgpr7 killed $vgpr7 killed $vgpr6_vgpr7 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v10
 ; GFX9-O0-NEXT:    s_mov_b32 s10, s6
 ; GFX9-O0-NEXT:    s_mov_b32 s11, s7
-; GFX9-O0-NEXT:    v_sub_co_u32_e32 v5, vcc, v5, v8
-; GFX9-O0-NEXT:    v_subb_co_u32_e32 v9, vcc, v6, v7, vcc
-; GFX9-O0-NEXT:    v_mov_b32_e32 v7, s10
+; GFX9-O0-NEXT:    v_sub_co_u32_e32 v4, vcc, v4, v7
+; GFX9-O0-NEXT:    v_subb_co_u32_e32 v8, vcc, v5, v6, vcc
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v6, s10
-; GFX9-O0-NEXT:    v_subb_co_u32_e32 v8, vcc, v6, v7, vcc
-; GFX9-O0-NEXT:    v_mov_b32_e32 v7, s11
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, s10
+; GFX9-O0-NEXT:    v_subb_co_u32_e32 v7, vcc, v5, v6, vcc
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v6, s11
-; GFX9-O0-NEXT:    v_subb_co_u32_e32 v7, vcc, v6, v7, vcc
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, s11
+; GFX9-O0-NEXT:    v_subb_co_u32_e32 v6, vcc, v5, v6, vcc
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
-; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v9
+; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v8
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
-; GFX9-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v7
-; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v9
+; GFX9-O0-NEXT:    ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v6
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v8
-; GFX9-O0-NEXT:    buffer_store_dword v10, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v7
+; GFX9-O0-NEXT:    buffer_store_dword v9, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v11, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v6
+; GFX9-O0-NEXT:    buffer_store_dword v10, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v5
-; GFX9-O0-NEXT:    buffer_store_dword v10, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v4
+; GFX9-O0-NEXT:    buffer_store_dword v9, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v11, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v10, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_mov_b64 s[8:9], s[6:7]
-; GFX9-O0-NEXT:    v_cmp_eq_u64_e64 s[8:9], v[8:9], s[8:9]
+; GFX9-O0-NEXT:    v_cmp_eq_u64_e64 s[8:9], v[7:8], s[8:9]
 ; GFX9-O0-NEXT:    s_mov_b64 s[12:13], 0x7f
 ; GFX9-O0-NEXT:    s_mov_b64 s[14:15], s[12:13]
-; GFX9-O0-NEXT:    v_cmp_gt_u64_e64 s[14:15], v[5:6], s[14:15]
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[14:15]
+; GFX9-O0-NEXT:    v_cmp_gt_u64_e64 s[14:15], v[4:5], s[14:15]
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[14:15]
 ; GFX9-O0-NEXT:    s_mov_b64 s[14:15], s[6:7]
-; GFX9-O0-NEXT:    v_cmp_ne_u64_e64 s[14:15], v[8:9], s[14:15]
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s[14:15]
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v7, v7, v10, s[8:9]
-; GFX9-O0-NEXT:    v_and_b32_e64 v7, 1, v7
-; GFX9-O0-NEXT:    v_cmp_eq_u32_e64 s[8:9], v7, 1
+; GFX9-O0-NEXT:    v_cmp_ne_u64_e64 s[14:15], v[7:8], s[14:15]
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s[14:15]
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v6, v6, v9, s[8:9]
+; GFX9-O0-NEXT:    v_and_b32_e64 v6, 1, v6
+; GFX9-O0-NEXT:    v_cmp_eq_u32_e64 s[8:9], v6, 1
 ; GFX9-O0-NEXT:    s_or_b64 s[8:9], s[4:5], s[8:9]
 ; GFX9-O0-NEXT:    s_mov_b64 s[4:5], -1
 ; GFX9-O0-NEXT:    s_xor_b64 s[4:5], s[8:9], s[4:5]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v6
+; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v5
 ; GFX9-O0-NEXT:    s_mov_b32 s14, s13
-; GFX9-O0-NEXT:    v_xor_b32_e64 v7, v7, s14
-; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 killed $vgpr5_vgpr6 killed $exec
+; GFX9-O0-NEXT:    v_xor_b32_e64 v6, v6, s14
+; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
 ; GFX9-O0-NEXT:    ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13
-; GFX9-O0-NEXT:    v_xor_b32_e64 v5, v5, s12
-; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v7
-; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v9
-; GFX9-O0-NEXT:    v_or_b32_e64 v7, v7, v10
-; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 killed $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v8
-; GFX9-O0-NEXT:    v_or_b32_e64 v5, v5, v6
-; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v7
-; GFX9-O0-NEXT:    v_cmp_ne_u64_e64 s[6:7], v[5:6], s[6:7]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, s11
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v5, v2, v5, s[8:9]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, s10
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v1, v1, v2, s[8:9]
+; GFX9-O0-NEXT:    v_xor_b32_e64 v4, v4, s12
+; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v6
+; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v5
+; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v8
+; GFX9-O0-NEXT:    v_or_b32_e64 v6, v6, v9
+; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v7
+; GFX9-O0-NEXT:    v_or_b32_e64 v4, v4, v5
+; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v6
+; GFX9-O0-NEXT:    v_cmp_ne_u64_e64 s[6:7], v[4:5], s[6:7]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s11
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v4, v1, v4, s[8:9]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, s10
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v0, v0, v1, s[8:9]
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr12
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr12
-; GFX9-O0-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v5
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, s11
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v5, v4, v5, s[8:9]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s10
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v3, v3, v4, s[8:9]
+; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v4
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s11
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v4, v3, v4, s[8:9]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v3, s10
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v2, v2, v3, s[8:9]
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
-; GFX9-O0-NEXT:    ; kill: def $vgpr3 killed $vgpr3 def $vgpr3_vgpr4 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v5
+; GFX9-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v4
 ; GFX9-O0-NEXT:    s_and_b64 s[6:7], s[4:5], s[6:7]
-; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_mov_b64 s[4:5], exec
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s4, 2
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s5, 3
+; GFX9-O0-NEXT:    v_writelane_b32 v30, s4, 2
+; GFX9-O0-NEXT:    v_writelane_b32 v30, s5, 3
 ; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
 ; GFX9-O0-NEXT:    s_and_b64 s[4:5], s[4:5], s[6:7]
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[4:5]
@@ -1989,11 +1977,11 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    s_branch .LBB1_8
 ; GFX9-O0-NEXT:  .LBB1_1: ; %Flow
 ; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    v_readlane_b32 s4, v0, 4
-; GFX9-O0-NEXT:    v_readlane_b32 s5, v0, 5
+; GFX9-O0-NEXT:    v_readlane_b32 s4, v30, 4
+; GFX9-O0-NEXT:    v_readlane_b32 s5, v30, 5
 ; GFX9-O0-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX9-O0-NEXT:  ; %bb.2: ; %Flow
 ; GFX9-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:136 ; 4-byte Folded Reload
@@ -2020,11 +2008,11 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    s_branch .LBB1_5
 ; GFX9-O0-NEXT:  .LBB1_3: ; %Flow2
 ; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    v_readlane_b32 s4, v4, 2
-; GFX9-O0-NEXT:    v_readlane_b32 s5, v4, 3
+; GFX9-O0-NEXT:    v_readlane_b32 s4, v30, 2
+; GFX9-O0-NEXT:    v_readlane_b32 s5, v30, 3
 ; GFX9-O0-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
@@ -2080,11 +2068,11 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    s_branch .LBB1_3
 ; GFX9-O0-NEXT:  .LBB1_5: ; %Flow1
 ; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    v_readlane_b32 s4, v8, 6
-; GFX9-O0-NEXT:    v_readlane_b32 s5, v8, 7
+; GFX9-O0-NEXT:    v_readlane_b32 s4, v30, 6
+; GFX9-O0-NEXT:    v_readlane_b32 s5, v30, 7
 ; GFX9-O0-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:128 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload
@@ -2111,85 +2099,85 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:  .LBB1_6: ; %udiv-do-while
 ; GFX9-O0-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT:    buffer_load_dword v16, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    v_readlane_b32 s6, v16, 8
-; GFX9-O0-NEXT:    v_readlane_b32 s7, v16, 9
+; GFX9-O0-NEXT:    v_readlane_b32 s6, v30, 8
+; GFX9-O0-NEXT:    v_readlane_b32 s7, v30, 9
 ; GFX9-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:224 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:228 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v23, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v24, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v22, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v23, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:240 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v17, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v18, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v16, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v17, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v9, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v27, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v28, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v25, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v26, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v19, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v20, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v21, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v22, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v26, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v27, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v24, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v25, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v18, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v19, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v20, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v21, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v14, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v15, off, s[0:3], s32 offset:284 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:288 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v12, off, s[0:3], s32 offset:292 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_mov_b32 s4, 63
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(16)
-; GFX9-O0-NEXT:    v_lshrrev_b64 v[29:30], s4, v[2:3]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v30
+; GFX9-O0-NEXT:    v_lshrrev_b64 v[28:29], s4, v[2:3]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v29
 ; GFX9-O0-NEXT:    s_mov_b32 s5, 1
-; GFX9-O0-NEXT:    v_lshlrev_b64 v[23:24], s5, v[23:24]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v24
+; GFX9-O0-NEXT:    v_lshlrev_b64 v[22:23], s5, v[22:23]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v23
 ; GFX9-O0-NEXT:    v_or_b32_e64 v4, v4, v5
-; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v29
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v23
-; GFX9-O0-NEXT:    v_or_b32_e64 v23, v5, v10
-; GFX9-O0-NEXT:    ; kill: def $vgpr23 killed $vgpr23 def $vgpr23_vgpr24 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v24, v4
-; GFX9-O0-NEXT:    v_lshlrev_b64 v[29:30], s5, v[2:3]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v28
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v22
+; GFX9-O0-NEXT:    v_or_b32_e64 v22, v5, v10
+; GFX9-O0-NEXT:    ; kill: def $vgpr22 killed $vgpr22 def $vgpr22_vgpr23 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v23, v4
+; GFX9-O0-NEXT:    v_lshlrev_b64 v[28:29], s5, v[2:3]
 ; GFX9-O0-NEXT:    v_lshrrev_b64 v[4:5], s4, v[6:7]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v30
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v29
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v5
 ; GFX9-O0-NEXT:    v_or_b32_e64 v2, v2, v3
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v29
+; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v28
 ; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
 ; GFX9-O0-NEXT:    v_or_b32_e64 v4, v3, v4
 ; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v2
 ; GFX9-O0-NEXT:    v_lshlrev_b64 v[2:3], s5, v[0:1]
-; GFX9-O0-NEXT:    v_lshlrev_b64 v[29:30], s5, v[6:7]
+; GFX9-O0-NEXT:    v_lshlrev_b64 v[28:29], s5, v[6:7]
 ; GFX9-O0-NEXT:    v_lshrrev_b64 v[0:1], s4, v[0:1]
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v1
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v30
+; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v29
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(10)
-; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v28
+; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v27
 ; GFX9-O0-NEXT:    v_or3_b32 v6, v6, v7, v10
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v0
-; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v29
-; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v27
+; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v28
+; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v26
 ; GFX9-O0-NEXT:    v_or3_b32 v0, v0, v1, v7
 ; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v6
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v3
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(8)
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v26
+; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v25
 ; GFX9-O0-NEXT:    v_or_b32_e64 v6, v6, v7
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v2
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v25
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v24
 ; GFX9-O0-NEXT:    v_or_b32_e64 v2, v2, v3
 ; GFX9-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v6
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v4
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v5
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v23
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v24
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v22
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v23
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v11
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v14
@@ -2214,22 +2202,22 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v12
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v12, 0
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v13, 0
-; GFX9-O0-NEXT:    v_mov_b32_e32 v23, v22
-; GFX9-O0-NEXT:    v_and_b32_e64 v23, v7, v23
-; GFX9-O0-NEXT:    v_and_b32_e64 v21, v11, v21
-; GFX9-O0-NEXT:    ; kill: def $vgpr21 killed $vgpr21 def $vgpr21_vgpr22 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v22, v23
-; GFX9-O0-NEXT:    v_mov_b32_e32 v23, v20
-; GFX9-O0-NEXT:    v_and_b32_e64 v7, v7, v23
-; GFX9-O0-NEXT:    v_and_b32_e64 v23, v11, v19
-; GFX9-O0-NEXT:    ; kill: def $vgpr23 killed $vgpr23 def $vgpr23_vgpr24 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v24, v7
-; GFX9-O0-NEXT:    v_mov_b32_e32 v20, v23
-; GFX9-O0-NEXT:    v_mov_b32_e32 v19, v24
-; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v21
-; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v22
-; GFX9-O0-NEXT:    v_sub_co_u32_e32 v6, vcc, v6, v20
-; GFX9-O0-NEXT:    v_subb_co_u32_e32 v10, vcc, v10, v19, vcc
+; GFX9-O0-NEXT:    v_mov_b32_e32 v22, v21
+; GFX9-O0-NEXT:    v_and_b32_e64 v22, v7, v22
+; GFX9-O0-NEXT:    v_and_b32_e64 v20, v11, v20
+; GFX9-O0-NEXT:    ; kill: def $vgpr20 killed $vgpr20 def $vgpr20_vgpr21 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v21, v22
+; GFX9-O0-NEXT:    v_mov_b32_e32 v22, v19
+; GFX9-O0-NEXT:    v_and_b32_e64 v7, v7, v22
+; GFX9-O0-NEXT:    v_and_b32_e64 v22, v11, v18
+; GFX9-O0-NEXT:    ; kill: def $vgpr22 killed $vgpr22 def $vgpr22_vgpr23 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v23, v7
+; GFX9-O0-NEXT:    v_mov_b32_e32 v19, v22
+; GFX9-O0-NEXT:    v_mov_b32_e32 v18, v23
+; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v20
+; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v21
+; GFX9-O0-NEXT:    v_sub_co_u32_e32 v6, vcc, v6, v19
+; GFX9-O0-NEXT:    v_subb_co_u32_e32 v10, vcc, v10, v18, vcc
 ; GFX9-O0-NEXT:    v_subb_co_u32_e32 v4, vcc, v4, v11, vcc
 ; GFX9-O0-NEXT:    v_subb_co_u32_e32 v7, vcc, v5, v7, vcc
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
@@ -2245,66 +2233,66 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    s_mov_b64 s[8:9], -1
 ; GFX9-O0-NEXT:    s_mov_b32 s5, s8
 ; GFX9-O0-NEXT:    s_mov_b32 s4, s9
-; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v17
-; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v18
-; GFX9-O0-NEXT:    v_mov_b32_e32 v17, s5
-; GFX9-O0-NEXT:    v_add_co_u32_e32 v20, vcc, v11, v17
+; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v16
+; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v17
+; GFX9-O0-NEXT:    v_mov_b32_e32 v16, s5
+; GFX9-O0-NEXT:    v_add_co_u32_e32 v19, vcc, v11, v16
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v11, s4
 ; GFX9-O0-NEXT:    v_addc_co_u32_e32 v9, vcc, v9, v11, vcc
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v11, s5
-; GFX9-O0-NEXT:    v_addc_co_u32_e32 v17, vcc, v10, v11, vcc
+; GFX9-O0-NEXT:    v_addc_co_u32_e32 v16, vcc, v10, v11, vcc
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v10, s4
 ; GFX9-O0-NEXT:    v_addc_co_u32_e32 v8, vcc, v8, v10, vcc
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
-; GFX9-O0-NEXT:    ; kill: def $vgpr20 killed $vgpr20 def $vgpr20_vgpr21 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v21, v9
+; GFX9-O0-NEXT:    ; kill: def $vgpr19 killed $vgpr19 def $vgpr19_vgpr20 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v20, v9
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
-; GFX9-O0-NEXT:    ; kill: def $vgpr17 killed $vgpr17 def $vgpr17_vgpr18 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v18, v8
-; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v17
-; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v18
-; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v20
-; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v21
-; GFX9-O0-NEXT:    v_mov_b32_e32 v22, v18
-; GFX9-O0-NEXT:    v_mov_b32_e32 v19, v21
-; GFX9-O0-NEXT:    v_or_b32_e64 v19, v19, v22
-; GFX9-O0-NEXT:    v_mov_b32_e32 v18, v17
-; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v20
-; GFX9-O0-NEXT:    v_or_b32_e64 v17, v17, v18
-; GFX9-O0-NEXT:    ; kill: def $vgpr17 killed $vgpr17 def $vgpr17_vgpr18 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v18, v19
-; GFX9-O0-NEXT:    v_cmp_eq_u64_e64 s[4:5], v[17:18], v[12:13]
+; GFX9-O0-NEXT:    ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v8
+; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v16
+; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v17
+; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v19
+; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v20
+; GFX9-O0-NEXT:    v_mov_b32_e32 v21, v17
+; GFX9-O0-NEXT:    v_mov_b32_e32 v18, v20
+; GFX9-O0-NEXT:    v_or_b32_e64 v18, v18, v21
+; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v16
+; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v19
+; GFX9-O0-NEXT:    v_or_b32_e64 v16, v16, v17
+; GFX9-O0-NEXT:    ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v18
+; GFX9-O0-NEXT:    v_cmp_eq_u64_e64 s[4:5], v[16:17], v[12:13]
 ; GFX9-O0-NEXT:    s_or_b64 s[4:5], s[4:5], s[6:7]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v18, v3
-; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v2
-; GFX9-O0-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v3
+; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v2
+; GFX9-O0-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v18, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    v_mov_b32_e32 v18, v1
-; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v0
-; GFX9-O0-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v1
+; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v0
+; GFX9-O0-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v18, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    v_mov_b32_e32 v18, v15
-; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v14
-; GFX9-O0-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v15
+; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v14
+; GFX9-O0-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v18, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    v_mov_b32_e32 v18, v13
-; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v12
-; GFX9-O0-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v13
+; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v12
+; GFX9-O0-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v18, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_mov_b64 s[6:7], s[4:5]
-; GFX9-O0-NEXT:    v_writelane_b32 v16, s6, 4
-; GFX9-O0-NEXT:    v_writelane_b32 v16, s7, 5
+; GFX9-O0-NEXT:    v_writelane_b32 v30, s6, 4
+; GFX9-O0-NEXT:    v_writelane_b32 v30, s7, 5
 ; GFX9-O0-NEXT:    s_mov_b64 s[6:7], s[4:5]
-; GFX9-O0-NEXT:    v_writelane_b32 v16, s6, 8
-; GFX9-O0-NEXT:    v_writelane_b32 v16, s7, 9
+; GFX9-O0-NEXT:    v_writelane_b32 v30, s6, 8
+; GFX9-O0-NEXT:    v_writelane_b32 v30, s7, 9
 ; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT:    buffer_store_dword v16, off, s[0:3], s32 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
 ; GFX9-O0-NEXT:    buffer_store_dword v14, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
@@ -2334,6 +2322,9 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    s_cbranch_execnz .LBB1_6
 ; GFX9-O0-NEXT:    s_branch .LBB1_1
 ; GFX9-O0-NEXT:  .LBB1_7: ; %udiv-preheader
+; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
+; GFX9-O0-NEXT:    buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
 ; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:296 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:300 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:304 ; 4-byte Folded Reload
@@ -2342,52 +2333,49 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    buffer_load_dword v9, off, s[0:3], s32 offset:316 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v10, off, s[0:3], s32 offset:320 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:324 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT:    buffer_load_dword v16, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
-; GFX9-O0-NEXT:    buffer_load_dword v17, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v18, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v16, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v17, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v13, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v14, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v19, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v20, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v21, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v22, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    s_waitcnt vmcnt(9)
+; GFX9-O0-NEXT:    buffer_load_dword v18, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v19, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v20, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v21, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    s_waitcnt vmcnt(8)
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v10
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    v_lshrrev_b64 v[6:7], v4, v[21:22]
+; GFX9-O0-NEXT:    v_lshrrev_b64 v[6:7], v4, v[20:21]
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v7
 ; GFX9-O0-NEXT:    s_mov_b32 s6, 64
 ; GFX9-O0-NEXT:    v_sub_u32_e64 v12, s6, v4
-; GFX9-O0-NEXT:    v_lshlrev_b64 v[23:24], v12, v[19:20]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v24
+; GFX9-O0-NEXT:    v_lshlrev_b64 v[22:23], v12, v[18:19]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v23
 ; GFX9-O0-NEXT:    v_or_b32_e64 v5, v5, v12
 ; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 killed $vgpr6_vgpr7 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v23
+; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v22
 ; GFX9-O0-NEXT:    v_or_b32_e64 v6, v6, v7
 ; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v5
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v7
 ; GFX9-O0-NEXT:    v_cmp_lt_u32_e64 s[4:5], v4, s6
 ; GFX9-O0-NEXT:    v_sub_u32_e64 v5, v4, s6
-; GFX9-O0-NEXT:    v_lshrrev_b64 v[23:24], v5, v[19:20]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v24
+; GFX9-O0-NEXT:    v_lshrrev_b64 v[22:23], v5, v[18:19]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v23
 ; GFX9-O0-NEXT:    v_cndmask_b32_e64 v5, v5, v12, s[4:5]
 ; GFX9-O0-NEXT:    s_mov_b32 s6, 0
 ; GFX9-O0-NEXT:    v_cmp_eq_u32_e64 s[6:7], v4, s6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v22
+; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v21
 ; GFX9-O0-NEXT:    v_cndmask_b32_e64 v5, v5, v12, s[6:7]
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v23
+; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v22
 ; GFX9-O0-NEXT:    v_cndmask_b32_e64 v6, v6, v7, s[4:5]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v21
+; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v20
 ; GFX9-O0-NEXT:    v_cndmask_b32_e64 v6, v6, v7, s[6:7]
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
 ; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v5
-; GFX9-O0-NEXT:    v_lshrrev_b64 v[4:5], v4, v[19:20]
+; GFX9-O0-NEXT:    v_lshrrev_b64 v[4:5], v4, v[18:19]
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v5
 ; GFX9-O0-NEXT:    s_mov_b64 s[6:7], 0
 ; GFX9-O0-NEXT:    s_mov_b32 s8, s7
@@ -2406,12 +2394,12 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    s_mov_b64 s[8:9], -1
 ; GFX9-O0-NEXT:    s_mov_b32 s5, s8
 ; GFX9-O0-NEXT:    s_mov_b32 s4, s9
-; GFX9-O0-NEXT:    v_mov_b32_e32 v14, v17
-; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v18
-; GFX9-O0-NEXT:    v_mov_b32_e32 v17, s5
-; GFX9-O0-NEXT:    v_add_co_u32_e32 v12, vcc, v12, v17
-; GFX9-O0-NEXT:    v_mov_b32_e32 v17, s4
-; GFX9-O0-NEXT:    v_addc_co_u32_e32 v17, vcc, v15, v17, vcc
+; GFX9-O0-NEXT:    v_mov_b32_e32 v14, v16
+; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v17
+; GFX9-O0-NEXT:    v_mov_b32_e32 v16, s5
+; GFX9-O0-NEXT:    v_add_co_u32_e32 v12, vcc, v12, v16
+; GFX9-O0-NEXT:    v_mov_b32_e32 v16, s4
+; GFX9-O0-NEXT:    v_addc_co_u32_e32 v16, vcc, v15, v16, vcc
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v15, s5
 ; GFX9-O0-NEXT:    v_addc_co_u32_e32 v14, vcc, v14, v15, vcc
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v15, s4
@@ -2423,7 +2411,7 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
 ; GFX9-O0-NEXT:    ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v17
+; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v16
 ; GFX9-O0-NEXT:    s_mov_b64 s[8:9], s[6:7]
 ; GFX9-O0-NEXT:    buffer_store_dword v14, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
@@ -2436,10 +2424,10 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v14, s8
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v13, s7
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v12, s6
-; GFX9-O0-NEXT:    v_writelane_b32 v16, s4, 8
-; GFX9-O0-NEXT:    v_writelane_b32 v16, s5, 9
+; GFX9-O0-NEXT:    v_writelane_b32 v30, s4, 8
+; GFX9-O0-NEXT:    v_writelane_b32 v30, s5, 9
 ; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT:    buffer_store_dword v16, off, s[0:3], s32 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
 ; GFX9-O0-NEXT:    buffer_store_dword v14, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
@@ -2468,400 +2456,392 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    s_branch .LBB1_6
 ; GFX9-O0-NEXT:  .LBB1_8: ; %udiv-bb1
 ; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
-; GFX9-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v12, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v10, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_mov_b64 s[6:7], 1
 ; GFX9-O0-NEXT:    s_mov_b32 s5, s6
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v1
+; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v0
 ; GFX9-O0-NEXT:    s_mov_b32 s4, s7
 ; GFX9-O0-NEXT:    s_mov_b64 s[6:7], 0
 ; GFX9-O0-NEXT:    s_mov_b32 s8, s6
 ; GFX9-O0-NEXT:    s_mov_b32 s9, s7
-; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v5
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, s5
-; GFX9-O0-NEXT:    v_add_co_u32_e32 v9, vcc, v4, v5
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, s4
-; GFX9-O0-NEXT:    v_addc_co_u32_e32 v2, vcc, v2, v5, vcc
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, s8
-; GFX9-O0-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v5, vcc
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, s9
-; GFX9-O0-NEXT:    v_addc_co_u32_e32 v3, vcc, v3, v5, vcc
+; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v4
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v5
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s5
+; GFX9-O0-NEXT:    v_add_co_u32_e32 v8, vcc, v3, v4
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s4
+; GFX9-O0-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v4, vcc
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s8
+; GFX9-O0-NEXT:    v_addc_co_u32_e32 v0, vcc, v0, v4, vcc
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s9
+; GFX9-O0-NEXT:    v_addc_co_u32_e32 v2, vcc, v2, v4, vcc
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
-; GFX9-O0-NEXT:    ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v2
+; GFX9-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v1
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
-; GFX9-O0-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v3
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v2
+; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v2
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v1
-; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:312 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v0
+; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:312 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:316 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:316 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v8
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v9
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v10
-; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:320 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:320 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:324 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:324 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_mov_b32 s4, 0x7f
-; GFX9-O0-NEXT:    v_sub_u32_e64 v3, s4, v4
-; GFX9-O0-NEXT:    v_lshlrev_b64 v[5:6], v3, v[11:12]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v6
+; GFX9-O0-NEXT:    v_sub_u32_e64 v2, s4, v3
+; GFX9-O0-NEXT:    v_lshlrev_b64 v[4:5], v2, v[10:11]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v5
 ; GFX9-O0-NEXT:    s_mov_b32 s4, 64
-; GFX9-O0-NEXT:    v_sub_u32_e64 v14, s4, v3
-; GFX9-O0-NEXT:    v_lshrrev_b64 v[14:15], v14, v[7:8]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v15
-; GFX9-O0-NEXT:    v_or_b32_e64 v13, v13, v16
-; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 killed $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v14
-; GFX9-O0-NEXT:    v_or_b32_e64 v5, v5, v6
-; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v13
-; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v6
-; GFX9-O0-NEXT:    v_cmp_lt_u32_e64 s[4:5], v3, s4
+; GFX9-O0-NEXT:    v_sub_u32_e64 v13, s4, v2
+; GFX9-O0-NEXT:    v_lshrrev_b64 v[13:14], v13, v[6:7]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v14
+; GFX9-O0-NEXT:    v_or_b32_e64 v12, v12, v15
+; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v13
+; GFX9-O0-NEXT:    v_or_b32_e64 v4, v4, v5
+; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v12
+; GFX9-O0-NEXT:    v_mov_b32_e32 v14, v5
+; GFX9-O0-NEXT:    v_cmp_lt_u32_e64 s[4:5], v2, s4
 ; GFX9-O0-NEXT:    s_mov_b32 s10, 63
-; GFX9-O0-NEXT:    v_sub_u32_e64 v4, s10, v4
-; GFX9-O0-NEXT:    v_lshlrev_b64 v[13:14], v4, v[7:8]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v14
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v15, s[4:5]
+; GFX9-O0-NEXT:    v_sub_u32_e64 v3, s10, v3
+; GFX9-O0-NEXT:    v_lshlrev_b64 v[12:13], v3, v[6:7]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v13
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v3, v3, v14, s[4:5]
 ; GFX9-O0-NEXT:    s_mov_b32 s10, 0
-; GFX9-O0-NEXT:    v_cmp_eq_u32_e64 s[10:11], v3, s10
-; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v12
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v15, s[10:11]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v5
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v13
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v5, v5, v6, s[4:5]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v11
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v5, v5, v6, s[10:11]
+; GFX9-O0-NEXT:    v_cmp_eq_u32_e64 s[10:11], v2, s10
+; GFX9-O0-NEXT:    v_mov_b32_e32 v14, v11
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v3, v3, v14, s[10:11]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v4
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v12
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v5, s[4:5]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v10
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v5, s[10:11]
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr10
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr10
-; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v4
-; GFX9-O0-NEXT:    v_lshlrev_b64 v[7:8], v3, v[7:8]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v8
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, s9
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v3, v3, v4, s[4:5]
-; GFX9-O0-NEXT:    ; kill: def $vgpr7 killed $vgpr7 killed $vgpr7_vgpr8 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s8
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v7, v4, v7, s[4:5]
+; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v3
+; GFX9-O0-NEXT:    v_lshlrev_b64 v[6:7], v2, v[6:7]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v7
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, s9
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v2, v2, v3, s[4:5]
+; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 killed $vgpr6_vgpr7 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v3, s8
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v6, v3, v6, s[4:5]
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
-; GFX9-O0-NEXT:    ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v3
-; GFX9-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:304 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v2
+; GFX9-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:304 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:300 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v2
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v10
-; GFX9-O0-NEXT:    v_or_b32_e64 v3, v3, v4
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v1
-; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v9
-; GFX9-O0-NEXT:    v_or_b32_e64 v1, v1, v2
-; GFX9-O0-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v3
-; GFX9-O0-NEXT:    v_cmp_ne_u64_e64 s[4:5], v[1:2], s[6:7]
+; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:300 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v1
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v9
+; GFX9-O0-NEXT:    v_or_b32_e64 v2, v2, v3
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v0
+; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v8
+; GFX9-O0-NEXT:    v_or_b32_e64 v0, v0, v1
+; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v2
+; GFX9-O0-NEXT:    v_cmp_ne_u64_e64 s[4:5], v[0:1], s[6:7]
 ; GFX9-O0-NEXT:    s_mov_b64 s[8:9], s[6:7]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, s8
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s9
-; GFX9-O0-NEXT:    v_mov_b32_e32 v1, s6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, s7
-; GFX9-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, s8
+; GFX9-O0-NEXT:    v_mov_b32_e32 v3, s9
+; GFX9-O0-NEXT:    v_mov_b32_e32 v0, s6
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, s7
+; GFX9-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_mov_b64 s[6:7], exec
 ; GFX9-O0-NEXT:    s_and_b64 s[4:5], s[6:7], s[4:5]
 ; GFX9-O0-NEXT:    s_xor_b64 s[6:7], s[4:5], s[6:7]
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s6, 6
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s7, 7
+; GFX9-O0-NEXT:    v_writelane_b32 v30, s6, 6
+; GFX9-O0-NEXT:    v_writelane_b32 v30, s7, 7
 ; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[4:5]
 ; GFX9-O0-NEXT:    s_cbranch_execz .LBB1_5
 ; GFX9-O0-NEXT:    s_branch .LBB1_7
 ; GFX9-O0-NEXT:  .LBB1_9: ; %udiv-end
-; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
-; GFX9-O0-NEXT:    buffer_load_dword v9, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v10, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v9, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v15, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v16, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:176 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v12, off, s[0:3], s32 offset:180 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:168 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:172 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v13, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v14, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v14, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v15, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v10, off, s[0:3], s32 offset:176 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:180 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:168 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:172 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v12, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v13, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_mov_b32 s4, 32
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(2)
-; GFX9-O0-NEXT:    v_lshrrev_b64 v[2:3], s4, v[7:8]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v2
+; GFX9-O0-NEXT:    v_lshrrev_b64 v[2:3], s4, v[6:7]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v2
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v12
+; GFX9-O0-NEXT:    v_mul_lo_u32 v4, v5, v2
+; GFX9-O0-NEXT:    v_lshrrev_b64 v[12:13], s4, v[12:13]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v12
+; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 killed $vgpr6_vgpr7 killed $exec
+; GFX9-O0-NEXT:    v_mul_lo_u32 v3, v6, v3
+; GFX9-O0-NEXT:    v_mad_u64_u32 v[12:13], s[6:7], v6, v2, 0
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v13
-; GFX9-O0-NEXT:    v_mul_lo_u32 v5, v6, v2
-; GFX9-O0-NEXT:    v_lshrrev_b64 v[13:14], s4, v[13:14]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v13
-; GFX9-O0-NEXT:    ; kill: def $vgpr7 killed $vgpr7 killed $vgpr7_vgpr8 killed $exec
-; GFX9-O0-NEXT:    v_mul_lo_u32 v3, v7, v3
-; GFX9-O0-NEXT:    v_mad_u64_u32 v[13:14], s[6:7], v7, v2, 0
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v14
-; GFX9-O0-NEXT:    v_add3_u32 v2, v2, v3, v5
+; GFX9-O0-NEXT:    v_add3_u32 v2, v2, v3, v4
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr5
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, s5
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s5
 ; GFX9-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v5
-; GFX9-O0-NEXT:    v_lshlrev_b64 v[17:18], s4, v[2:3]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v18
-; GFX9-O0-NEXT:    ; kill: def $vgpr13 killed $vgpr13 killed $vgpr13_vgpr14 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v4
+; GFX9-O0-NEXT:    v_lshlrev_b64 v[3:4], s4, v[2:3]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v4
+; GFX9-O0-NEXT:    ; kill: def $vgpr12 killed $vgpr12 killed $vgpr12_vgpr13 killed $exec
 ; GFX9-O0-NEXT:    s_mov_b32 s5, 0
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v2, s5
-; GFX9-O0-NEXT:    ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v14, v2
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v14
-; GFX9-O0-NEXT:    v_or_b32_e64 v2, v2, v3
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v17
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v13
-; GFX9-O0-NEXT:    v_or_b32_e64 v13, v3, v5
-; GFX9-O0-NEXT:    ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v14, v2
-; GFX9-O0-NEXT:    v_lshrrev_b64 v[2:3], s4, v[15:16]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v2
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v11
-; GFX9-O0-NEXT:    v_mul_lo_u32 v3, v2, v8
-; GFX9-O0-NEXT:    v_lshrrev_b64 v[11:12], s4, v[11:12]
-; GFX9-O0-NEXT:    ; kill: def $vgpr11 killed $vgpr11 killed $vgpr11_vgpr12 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v15
-; GFX9-O0-NEXT:    v_mul_lo_u32 v11, v11, v5
-; GFX9-O0-NEXT:    v_mad_u64_u32 v[15:16], s[6:7], v2, v5, 0
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v16
-; GFX9-O0-NEXT:    v_add3_u32 v2, v2, v3, v11
+; GFX9-O0-NEXT:    ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v2
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v13
+; GFX9-O0-NEXT:    v_or_b32_e64 v2, v2, v7
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v3
+; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v12
+; GFX9-O0-NEXT:    v_or_b32_e64 v12, v3, v4
+; GFX9-O0-NEXT:    ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v2
+; GFX9-O0-NEXT:    v_lshrrev_b64 v[2:3], s4, v[14:15]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v2
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v10
+; GFX9-O0-NEXT:    v_mul_lo_u32 v3, v2, v7
+; GFX9-O0-NEXT:    v_lshrrev_b64 v[10:11], s4, v[10:11]
+; GFX9-O0-NEXT:    ; kill: def $vgpr10 killed $vgpr10 killed $vgpr10_vgpr11 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v14
+; GFX9-O0-NEXT:    v_mul_lo_u32 v10, v10, v4
+; GFX9-O0-NEXT:    v_mad_u64_u32 v[14:15], s[6:7], v2, v4, 0
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v15
+; GFX9-O0-NEXT:    v_add3_u32 v2, v2, v3, v10
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr7
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr7
-; GFX9-O0-NEXT:    v_mov_b32_e32 v11, s6
+; GFX9-O0-NEXT:    v_mov_b32_e32 v10, s6
 ; GFX9-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v11
+; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v10
 ; GFX9-O0-NEXT:    v_lshlrev_b64 v[2:3], s4, v[2:3]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v3
-; GFX9-O0-NEXT:    ; kill: def $vgpr15 killed $vgpr15 killed $vgpr15_vgpr16 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v3
+; GFX9-O0-NEXT:    ; kill: def $vgpr14 killed $vgpr14 killed $vgpr14_vgpr15 killed $exec
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v11, s5
-; GFX9-O0-NEXT:    ; kill: def $vgpr15 killed $vgpr15 def $vgpr15_vgpr16 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v11
-; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v16
-; GFX9-O0-NEXT:    v_or_b32_e64 v11, v11, v12
+; GFX9-O0-NEXT:    v_mov_b32_e32 v10, s5
+; GFX9-O0-NEXT:    ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v10
+; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v15
+; GFX9-O0-NEXT:    v_or_b32_e64 v10, v10, v11
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v2
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v15
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v14
 ; GFX9-O0-NEXT:    v_or_b32_e64 v2, v2, v3
 ; GFX9-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v11
-; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v2
-; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v13
+; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v10
+; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v2
+; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v12
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v3
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v14
-; GFX9-O0-NEXT:    v_add_co_u32_e64 v13, s[6:7], v11, v12
+; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v13
+; GFX9-O0-NEXT:    v_add_co_u32_e64 v12, s[6:7], v10, v11
 ; GFX9-O0-NEXT:    v_addc_co_u32_e64 v2, s[6:7], v2, v3, s[6:7]
-; GFX9-O0-NEXT:    ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v14, v2
-; GFX9-O0-NEXT:    v_mad_u64_u32 v[15:16], s[6:7], v8, v6, 0
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v15
+; GFX9-O0-NEXT:    ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v2
+; GFX9-O0-NEXT:    v_mad_u64_u32 v[14:15], s[6:7], v7, v5, 0
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v14
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v11, s5
+; GFX9-O0-NEXT:    v_mov_b32_e32 v10, s5
 ; GFX9-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v11
-; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v3
-; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v16
+; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v10
+; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v3
+; GFX9-O0-NEXT:    v_mov_b32_e32 v14, v15
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr7
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr7
-; GFX9-O0-NEXT:    v_mov_b32_e32 v12, s6
-; GFX9-O0-NEXT:    ; kill: def $vgpr15 killed $vgpr15 def $vgpr15_vgpr16 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v12
-; GFX9-O0-NEXT:    v_lshlrev_b64 v[15:16], s4, v[15:16]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v16
-; GFX9-O0-NEXT:    v_or_b32_e64 v11, v11, v12
+; GFX9-O0-NEXT:    v_mov_b32_e32 v11, s6
+; GFX9-O0-NEXT:    ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v11
+; GFX9-O0-NEXT:    v_lshlrev_b64 v[14:15], s4, v[14:15]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v15
+; GFX9-O0-NEXT:    v_or_b32_e64 v10, v10, v11
 ; GFX9-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr2 killed $vgpr2_vgpr3 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v15
+; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v14
 ; GFX9-O0-NEXT:    v_or_b32_e64 v2, v2, v3
 ; GFX9-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v11
-; GFX9-O0-NEXT:    v_mad_u64_u32 v[15:16], s[6:7], v8, v7, 0
-; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v15
+; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v10
+; GFX9-O0-NEXT:    v_mad_u64_u32 v[14:15], s[6:7], v7, v6, 0
+; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v14
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v8, s5
-; GFX9-O0-NEXT:    ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v8
-; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v12
-; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v16
+; GFX9-O0-NEXT:    v_mov_b32_e32 v7, s5
+; GFX9-O0-NEXT:    ; kill: def $vgpr10 killed $vgpr10 def $vgpr10_vgpr11 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v7
+; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v11
+; GFX9-O0-NEXT:    v_mov_b32_e32 v14, v15
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr7
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr7
-; GFX9-O0-NEXT:    v_mov_b32_e32 v17, s6
-; GFX9-O0-NEXT:    ; kill: def $vgpr15 killed $vgpr15 def $vgpr15_vgpr16 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v17
-; GFX9-O0-NEXT:    v_lshlrev_b64 v[15:16], s4, v[15:16]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v16
-; GFX9-O0-NEXT:    v_or_b32_e64 v8, v8, v17
-; GFX9-O0-NEXT:    ; kill: def $vgpr11 killed $vgpr11 killed $vgpr11_vgpr12 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v15
-; GFX9-O0-NEXT:    v_or_b32_e64 v19, v11, v12
-; GFX9-O0-NEXT:    ; kill: def $vgpr19 killed $vgpr19 def $vgpr19_vgpr20 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v20, v8
-; GFX9-O0-NEXT:    v_mad_u64_u32 v[11:12], s[6:7], v5, v7, 0
-; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v12
+; GFX9-O0-NEXT:    v_mov_b32_e32 v16, s6
+; GFX9-O0-NEXT:    ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v16
+; GFX9-O0-NEXT:    v_lshlrev_b64 v[14:15], s4, v[14:15]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v15
+; GFX9-O0-NEXT:    v_or_b32_e64 v7, v7, v16
+; GFX9-O0-NEXT:    ; kill: def $vgpr10 killed $vgpr10 killed $vgpr10_vgpr11 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v14
+; GFX9-O0-NEXT:    v_or_b32_e64 v18, v10, v11
+; GFX9-O0-NEXT:    ; kill: def $vgpr18 killed $vgpr18 def $vgpr18_vgpr19 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v19, v7
+; GFX9-O0-NEXT:    v_mad_u64_u32 v[10:11], s[6:7], v4, v6, 0
+; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v11
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v7, s5
-; GFX9-O0-NEXT:    ; kill: def $vgpr17 killed $vgpr17 def $vgpr17_vgpr18 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v18, v7
+; GFX9-O0-NEXT:    v_mov_b32_e32 v6, s5
+; GFX9-O0-NEXT:    ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v6
+; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v18
+; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v16
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v19
-; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v17
-; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v20
-; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v18
-; GFX9-O0-NEXT:    v_add_co_u32_e64 v7, s[6:7], v7, v16
-; GFX9-O0-NEXT:    v_addc_co_u32_e64 v15, s[6:7], v8, v15, s[6:7]
-; GFX9-O0-NEXT:    ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v15
-; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v8
+; GFX9-O0-NEXT:    v_mov_b32_e32 v14, v17
+; GFX9-O0-NEXT:    v_add_co_u32_e64 v6, s[6:7], v6, v15
+; GFX9-O0-NEXT:    v_addc_co_u32_e64 v14, s[6:7], v7, v14, s[6:7]
+; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v14
+; GFX9-O0-NEXT:    v_mov_b32_e32 v14, v7
 ; GFX9-O0-NEXT:    s_mov_b64 s[6:7], 0xffffffff
 ; GFX9-O0-NEXT:    s_mov_b32 s8, s7
-; GFX9-O0-NEXT:    v_and_b32_e64 v15, v15, s8
-; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v7
+; GFX9-O0-NEXT:    v_and_b32_e64 v14, v14, s8
+; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v6
 ; GFX9-O0-NEXT:    ; kill: def $sgpr6 killed $sgpr6 killed $sgpr6_sgpr7
-; GFX9-O0-NEXT:    v_and_b32_e64 v17, v16, s6
-; GFX9-O0-NEXT:    ; kill: def $vgpr17 killed $vgpr17 def $vgpr17_vgpr18 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v18, v15
-; GFX9-O0-NEXT:    v_mad_u64_u32 v[15:16], s[6:7], v5, v6, 0
-; GFX9-O0-NEXT:    v_mov_b32_e32 v19, v15
+; GFX9-O0-NEXT:    v_and_b32_e64 v16, v15, s6
+; GFX9-O0-NEXT:    ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v14
+; GFX9-O0-NEXT:    v_mad_u64_u32 v[14:15], s[6:7], v4, v5, 0
+; GFX9-O0-NEXT:    v_mov_b32_e32 v18, v14
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, s5
-; GFX9-O0-NEXT:    ; kill: def $vgpr19 killed $vgpr19 def $vgpr19_vgpr20 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v20, v5
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v20
-; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v16
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s5
+; GFX9-O0-NEXT:    ; kill: def $vgpr18 killed $vgpr18 def $vgpr18_vgpr19 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v19, v4
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v19
+; GFX9-O0-NEXT:    v_mov_b32_e32 v14, v15
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr7
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr7
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, s6
-; GFX9-O0-NEXT:    ; kill: def $vgpr15 killed $vgpr15 def $vgpr15_vgpr16 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v6
-; GFX9-O0-NEXT:    v_lshlrev_b64 v[15:16], s4, v[15:16]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v16
-; GFX9-O0-NEXT:    v_or_b32_e64 v5, v5, v6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v19
-; GFX9-O0-NEXT:    ; kill: def $vgpr15 killed $vgpr15 killed $vgpr15_vgpr16 killed $exec
-; GFX9-O0-NEXT:    v_or_b32_e64 v19, v6, v15
-; GFX9-O0-NEXT:    ; kill: def $vgpr19 killed $vgpr19 def $vgpr19_vgpr20 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v20, v5
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, s6
+; GFX9-O0-NEXT:    ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v5
+; GFX9-O0-NEXT:    v_lshlrev_b64 v[14:15], s4, v[14:15]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v15
+; GFX9-O0-NEXT:    v_or_b32_e64 v4, v4, v5
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v18
+; GFX9-O0-NEXT:    ; kill: def $vgpr14 killed $vgpr14 killed $vgpr14_vgpr15 killed $exec
+; GFX9-O0-NEXT:    v_or_b32_e64 v18, v5, v14
+; GFX9-O0-NEXT:    ; kill: def $vgpr18 killed $vgpr18 def $vgpr18_vgpr19 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v19, v4
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v18
+; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v16
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v19
-; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v17
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v20
-; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v18
-; GFX9-O0-NEXT:    v_add_co_u32_e64 v5, s[6:7], v5, v16
-; GFX9-O0-NEXT:    v_addc_co_u32_e64 v15, s[6:7], v6, v15, s[6:7]
-; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v15
-; GFX9-O0-NEXT:    v_lshrrev_b64 v[17:18], s4, v[5:6]
-; GFX9-O0-NEXT:    v_lshrrev_b64 v[7:8], s4, v[7:8]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v7
-; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v17
-; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v8
-; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v18
-; GFX9-O0-NEXT:    v_add_co_u32_e64 v15, s[6:7], v15, v16
-; GFX9-O0-NEXT:    v_addc_co_u32_e64 v7, s[6:7], v7, v8, s[6:7]
-; GFX9-O0-NEXT:    ; kill: def $vgpr15 killed $vgpr15 def $vgpr15_vgpr16 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v7
-; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v2
-; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v15
+; GFX9-O0-NEXT:    v_mov_b32_e32 v14, v17
+; GFX9-O0-NEXT:    v_add_co_u32_e64 v4, s[6:7], v4, v15
+; GFX9-O0-NEXT:    v_addc_co_u32_e64 v14, s[6:7], v5, v14, s[6:7]
+; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v14
+; GFX9-O0-NEXT:    v_lshrrev_b64 v[16:17], s4, v[4:5]
+; GFX9-O0-NEXT:    v_lshrrev_b64 v[6:7], s4, v[6:7]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v14, v6
+; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v16
+; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v7
+; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v17
+; GFX9-O0-NEXT:    v_add_co_u32_e64 v14, s[6:7], v14, v15
+; GFX9-O0-NEXT:    v_addc_co_u32_e64 v6, s[6:7], v6, v7, s[6:7]
+; GFX9-O0-NEXT:    ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v6
+; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v2
+; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v14
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v3
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v16
-; GFX9-O0-NEXT:    v_add_co_u32_e64 v15, s[6:7], v7, v8
+; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v15
+; GFX9-O0-NEXT:    v_add_co_u32_e64 v14, s[6:7], v6, v7
 ; GFX9-O0-NEXT:    v_addc_co_u32_e64 v2, s[6:7], v2, v3, s[6:7]
-; GFX9-O0-NEXT:    ; kill: def $vgpr15 killed $vgpr15 def $vgpr15_vgpr16 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v2
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v15
-; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v13
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v16
-; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v14
-; GFX9-O0-NEXT:    v_add_co_u32_e64 v2, s[6:7], v2, v8
-; GFX9-O0-NEXT:    v_addc_co_u32_e64 v7, s[6:7], v3, v7, s[6:7]
+; GFX9-O0-NEXT:    ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v2
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v14
+; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v12
+; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v15
+; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v13
+; GFX9-O0-NEXT:    v_add_co_u32_e64 v2, s[6:7], v2, v7
+; GFX9-O0-NEXT:    v_addc_co_u32_e64 v6, s[6:7], v3, v6, s[6:7]
 ; GFX9-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v7
-; GFX9-O0-NEXT:    v_lshlrev_b64 v[6:7], s4, v[5:6]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v7
-; GFX9-O0-NEXT:    ; kill: def $vgpr11 killed $vgpr11 killed $vgpr11_vgpr12 killed $exec
-; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, s5
-; GFX9-O0-NEXT:    ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v5
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v12
-; GFX9-O0-NEXT:    v_or_b32_e64 v5, v5, v8
+; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v6
+; GFX9-O0-NEXT:    v_lshlrev_b64 v[5:6], s4, v[4:5]
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v11
-; GFX9-O0-NEXT:    v_or_b32_e64 v6, v6, v7
-; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v5
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v2
+; GFX9-O0-NEXT:    ; kill: def $vgpr10 killed $vgpr10 killed $vgpr10_vgpr11 killed $exec
+; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s5
+; GFX9-O0-NEXT:    ; kill: def $vgpr10 killed $vgpr10 def $vgpr10_vgpr11 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v4
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v11
+; GFX9-O0-NEXT:    v_or_b32_e64 v4, v4, v7
+; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v5
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v10
+; GFX9-O0-NEXT:    v_or_b32_e64 v5, v5, v6
+; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v4
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v2
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v3
-; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v7
-; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v0
+; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v5
+; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 killed $vgpr5_vgpr6 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v0
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v1
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v9
-; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v10
-; GFX9-O0-NEXT:    v_sub_co_u32_e32 v7, vcc, v7, v8
+; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v8
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v9
+; GFX9-O0-NEXT:    v_sub_co_u32_e32 v5, vcc, v5, v7
 ; GFX9-O0-NEXT:    v_subb_co_u32_e32 v0, vcc, v0, v6, vcc
-; GFX9-O0-NEXT:    v_subb_co_u32_e32 v5, vcc, v3, v5, vcc
+; GFX9-O0-NEXT:    v_subb_co_u32_e32 v3, vcc, v3, v4, vcc
 ; GFX9-O0-NEXT:    v_subb_co_u32_e32 v1, vcc, v1, v2, vcc
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr5
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr5
-; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v1
+; GFX9-O0-NEXT:    ; kill: def $vgpr3 killed $vgpr3 def $vgpr3_vgpr4 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v1
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr5
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr5
-; GFX9-O0-NEXT:    ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v0
-; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v7
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v5
-; GFX9-O0-NEXT:    v_lshrrev_b64 v[7:8], s4, v[7:8]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v7
+; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
+; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v0
+; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v5
+; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v3
 ; GFX9-O0-NEXT:    v_lshrrev_b64 v[5:6], s4, v[5:6]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v5
-; GFX9-O0-NEXT:    ; kill: killed $vgpr4
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v5
+; GFX9-O0-NEXT:    v_lshrrev_b64 v[3:4], s4, v[3:4]
+; GFX9-O0-NEXT:    ; kill: def $vgpr3 killed $vgpr3 killed $vgpr3_vgpr4 killed $exec
 ; GFX9-O0-NEXT:    s_xor_saveexec_b64 s[4:5], -1
-; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:328 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    s_nop 0
-; GFX9-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:332 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:336 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v16, off, s[0:3], s32 offset:340 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v30, off, s[0:3], s32 offset:328 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[4:5]
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-O0-NEXT:    s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/remat-vop.mir b/llvm/test/CodeGen/AMDGPU/remat-vop.mir
index 248a9e2ddb636..4f6ea44ccf68b 100644
--- a/llvm/test/CodeGen/AMDGPU/remat-vop.mir
+++ b/llvm/test/CodeGen/AMDGPU/remat-vop.mir
@@ -7,12 +7,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_mov_b32_e32
-    ; GCN: renamable $vgpr0 = V_MOV_B32_e32 1, implicit $exec
-    ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 2, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e32 3, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
+    ; GCN-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec
+    ; GCN-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 3, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MOV_B32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MOV_B32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MOV_B32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
     %1:vgpr_32 = V_MOV_B32_e32 2, implicit $exec
@@ -31,16 +31,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_no_remat_v_mov_b32_e32_impuse
     ; GCN: $m0 = IMPLICIT_DEF
-    ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e32 1, implicit $exec, implicit $m0
-    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 2, implicit $exec, implicit $m0
-    ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e32 3, implicit $exec, implicit $m0
-    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec, implicit $m0
+    ; GCN-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec, implicit $m0
+    ; GCN-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 3, implicit $exec, implicit $m0
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MOV_B32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MOV_B32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MOV_B32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     $m0 = IMPLICIT_DEF
     %0:vgpr_32 = V_MOV_B32_e32 1, implicit $exec, implicit $m0
@@ -59,12 +55,12 @@ machineFunctionInfo:
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_mov_b32_e32_exec_def
-    ; GCN: renamable $vgpr0 = V_MOV_B32_e32 1, implicit $exec
-    ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 2, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e32 3, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
+    ; GCN-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec
+    ; GCN-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 3, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MOV_B32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MOV_B32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MOV_B32_e32_2]]
     ; GCN-NEXT: $exec = S_ANDN2_B64_term $exec, undef renamable $sgpr0_sgpr1, implicit-def $scc
     ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
@@ -82,12 +78,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_mov_b32_e64
-    ; GCN: renamable $vgpr0 = V_MOV_B32_e64 1, implicit $exec
-    ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e64 2, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e64 3, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_MOV_B32_e64_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e64 1, implicit $exec
+    ; GCN-NEXT: [[V_MOV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e64 2, implicit $exec
+    ; GCN-NEXT: [[V_MOV_B32_e64_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e64 3, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MOV_B32_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MOV_B32_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MOV_B32_e64_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = V_MOV_B32_e64 1, implicit $exec
     %1:vgpr_32 = V_MOV_B32_e64 2, implicit $exec
@@ -105,16 +101,12 @@ machineFunctionInfo:
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_no_remat_v_mov_b32_dpp
-    ; GCN: renamable $vgpr0 = V_MOV_B32_dpp undef $vgpr0, undef $vgpr0, 1, 15, 15, 1, implicit $exec
-    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_dpp undef $vgpr1, undef $vgpr0, 1, 15, 15, 1, implicit $exec
-    ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_dpp undef $vgpr0, undef $vgpr0, 1, 15, 15, 1, implicit $exec
-    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp undef [[V_MOV_B32_dpp]], undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec
+    ; GCN-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp undef [[V_MOV_B32_dpp1]], undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec
+    ; GCN-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp undef [[V_MOV_B32_dpp2]], undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MOV_B32_dpp]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MOV_B32_dpp1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MOV_B32_dpp2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
     %2:vgpr_32 = V_MOV_B32_dpp undef %2:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
@@ -130,12 +122,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_accvgpr_read_b32
-    ; GCN: renamable $vgpr0 = V_ACCVGPR_READ_B32_e64 undef $agpr0, implicit $exec
-    ; GCN-NEXT: renamable $vgpr1 = V_ACCVGPR_READ_B32_e64 undef $agpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_ACCVGPR_READ_B32_e64 undef $agpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_ACCVGPR_READ_B32_e64_:%[0-9]+]]:vgpr_32 = V_ACCVGPR_READ_B32_e64 undef $agpr0, implicit $exec
+    ; GCN-NEXT: [[V_ACCVGPR_READ_B32_e64_1:%[0-9]+]]:vgpr_32 = V_ACCVGPR_READ_B32_e64 undef $agpr0, implicit $exec
+    ; GCN-NEXT: [[V_ACCVGPR_READ_B32_e64_2:%[0-9]+]]:vgpr_32 = V_ACCVGPR_READ_B32_e64 undef $agpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_ACCVGPR_READ_B32_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_ACCVGPR_READ_B32_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_ACCVGPR_READ_B32_e64_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = V_ACCVGPR_READ_B32_e64 undef $agpr0, implicit $exec
     %1:vgpr_32 = V_ACCVGPR_READ_B32_e64 undef $agpr0, implicit $exec
@@ -151,12 +143,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_accvgpr_write_b32
-    ; GCN: renamable $agpr0 = V_ACCVGPR_WRITE_B32_e64 1, implicit $exec
-    ; GCN-NEXT: renamable $agpr1 = V_ACCVGPR_WRITE_B32_e64 2, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $agpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $agpr1
-    ; GCN-NEXT: renamable $agpr0 = V_ACCVGPR_WRITE_B32_e64 3, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $agpr0
+    ; GCN: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 1, implicit $exec
+    ; GCN-NEXT: [[V_ACCVGPR_WRITE_B32_e64_1:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 2, implicit $exec
+    ; GCN-NEXT: [[V_ACCVGPR_WRITE_B32_e64_2:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 3, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_ACCVGPR_WRITE_B32_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_ACCVGPR_WRITE_B32_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_ACCVGPR_WRITE_B32_e64_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %0:agpr_32 = V_ACCVGPR_WRITE_B32_e64 1, implicit $exec
     %1:agpr_32 = V_ACCVGPR_WRITE_B32_e64 2, implicit $exec
@@ -172,12 +164,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_mov_b64_pseudo
-    ; GCN: renamable $vgpr0_vgpr1 = V_MOV_B64_PSEUDO 1, implicit $exec
-    ; GCN-NEXT: renamable $vgpr2_vgpr3 = V_MOV_B64_PSEUDO 2, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN-NEXT: renamable $vgpr0_vgpr1 = V_MOV_B64_PSEUDO 3, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 1, implicit $exec
+    ; GCN-NEXT: [[V_MOV_B1:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 2, implicit $exec
+    ; GCN-NEXT: [[V_MOV_B2:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 3, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MOV_B]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MOV_B1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MOV_B2]]
     ; GCN-NEXT: S_ENDPGM 0
     %0:vreg_64_align2 = V_MOV_B64_PSEUDO 1, implicit $exec
     %1:vreg_64_align2 = V_MOV_B64_PSEUDO 2, implicit $exec
@@ -193,12 +185,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_i32_f64_e32
-    ; GCN: renamable $vgpr0 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode
@@ -216,16 +208,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_no_remat_v_cvt_i32_f64_e32_fp_except
-    ; GCN: renamable $vgpr0 = V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode
-    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN-NEXT: renamable $vgpr1 = V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr0 = V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode
-    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode
@@ -245,16 +233,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_no_remat_v_cvt_i32_f64_e32_mode_def
     ; GCN: $mode = IMPLICIT_DEF
-    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode
-    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode
-    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     $mode = IMPLICIT_DEF
     %0:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode
@@ -271,12 +255,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_i32_f64_e64
-    ; GCN: renamable $vgpr0 = nofpexcept V_CVT_I32_F64_e64 0, 1, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_CVT_I32_F64_e64 0, 2, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_CVT_I32_F64_e64 0, 3, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_CVT_I32_F64_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e64 0, 1, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_CVT_I32_F64_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e64 0, 2, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_CVT_I32_F64_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e64 0, 3, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e64_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_CVT_I32_F64_e64 0, 1, 0, 0, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_CVT_I32_F64_e64 0, 2, 0, 0, implicit $exec, implicit $mode
@@ -294,12 +278,12 @@ machineFunctionInfo:
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_i32_f64_e64_undef
-    ; GCN: renamable $vgpr0 = nofpexcept V_CVT_I32_F64_e64 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_CVT_I32_F64_e64 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_CVT_I32_F64_e64 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_CVT_I32_F64_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e64 0, undef %1:vreg_64, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_CVT_I32_F64_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e64 0, undef %1:vreg_64, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_CVT_I32_F64_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e64 0, undef %1:vreg_64, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e64_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_CVT_I32_F64_e64 0, undef %0:vreg_64, 0, 0, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_CVT_I32_F64_e64 0, undef %0:vreg_64, 0, 0, implicit $exec, implicit $mode
@@ -317,16 +301,12 @@ machineFunctionInfo:
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_no_remat_v_cvt_i32_f64_dpp
-    ; GCN: renamable $vgpr0 = V_CVT_I32_F64_dpp undef $vgpr0, 0, undef $vgpr0_vgpr1, 336, 0, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN-NEXT: renamable $vgpr1 = V_CVT_I32_F64_dpp undef $vgpr1, 0, undef $vgpr0_vgpr1, 336, 0, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr0 = V_CVT_I32_F64_dpp undef $vgpr0, 0, undef $vgpr0_vgpr1, 336, 0, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_CVT_I32_F64_dpp:%[0-9]+]]:vgpr_32 = V_CVT_I32_F64_dpp undef [[V_CVT_I32_F64_dpp]], 0, undef %1:vreg_64_align2, 336, 0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_CVT_I32_F64_dpp1:%[0-9]+]]:vgpr_32 = V_CVT_I32_F64_dpp undef [[V_CVT_I32_F64_dpp1]], 0, undef %1:vreg_64_align2, 336, 0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_CVT_I32_F64_dpp2:%[0-9]+]]:vgpr_32 = V_CVT_I32_F64_dpp undef [[V_CVT_I32_F64_dpp2]], 0, undef %1:vreg_64_align2, 336, 0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_dpp]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_dpp1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_dpp2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_CVT_I32_F64_dpp undef %1:vgpr_32, 0, undef %0:vreg_64_align2, 336, 0, 0, 0, implicit $exec, implicit $mode
     %2:vgpr_32 = V_CVT_I32_F64_dpp undef %2:vgpr_32, 0, undef %0:vreg_64_align2, 336, 0, 0, 0, implicit $exec, implicit $mode
@@ -344,16 +324,12 @@ machineFunctionInfo:
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_no_remat_v_cvt_i32_f64_e32_imp_def
-    ; GCN: renamable $vgpr0 = V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0
-    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN-NEXT: renamable $vgpr1 = V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0
-    ; GCN-NEXT: renamable $vgpr0 = V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0
-    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0
+    ; GCN-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0
+    ; GCN-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0
     %1:vgpr_32 = V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0
@@ -371,16 +347,12 @@ machineFunctionInfo:
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_no_remat_v_cvt_i32_f64_e32_imp_use
-    ; GCN: renamable $vgpr0 = V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit $m0
-    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN-NEXT: renamable $vgpr1 = V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit $m0
-    ; GCN-NEXT: renamable $vgpr0 = V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit $m0
-    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit $m0
+    ; GCN-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit $m0
+    ; GCN-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit $m0
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit $m0
     %1:vgpr_32 = V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit $m0
@@ -396,12 +368,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_f64_i32_e32
-    ; GCN: renamable $vgpr0_vgpr1 = V_CVT_F64_I32_e32 1, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr2_vgpr3 = V_CVT_F64_I32_e32 2, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN-NEXT: renamable $vgpr0_vgpr1 = V_CVT_F64_I32_e32 3, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN: [[V_CVT_F64_I32_e32_:%[0-9]+]]:vreg_64_align2 = V_CVT_F64_I32_e32 1, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_CVT_F64_I32_e32_1:%[0-9]+]]:vreg_64_align2 = V_CVT_F64_I32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_CVT_F64_I32_e32_2:%[0-9]+]]:vreg_64_align2 = V_CVT_F64_I32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F64_I32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F64_I32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F64_I32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %0:vreg_64_align2 = V_CVT_F64_I32_e32 1, implicit $exec, implicit $mode
     %1:vreg_64_align2 = V_CVT_F64_I32_e32 2, implicit $exec, implicit $mode
@@ -417,12 +389,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_f32_f64_e32
-    ; GCN: renamable $vgpr0 = nofpexcept V_CVT_F32_F64_e32 1, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_CVT_F32_F64_e32 2, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_CVT_F32_F64_e32 3, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_CVT_F32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F64_e32 1, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_CVT_F32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F64_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_CVT_F32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F64_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F32_F64_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F32_F64_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F32_F64_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_CVT_F32_F64_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_CVT_F32_F64_e32 2, implicit $exec, implicit $mode
@@ -438,12 +410,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_f64_f32_e32
-    ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_CVT_F64_F32_e32 1, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_CVT_F64_F32_e32 2, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_CVT_F64_F32_e32 3, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN: [[V_CVT_F64_F32_e32_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_CVT_F64_F32_e32 1, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_CVT_F64_F32_e32_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_CVT_F64_F32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_CVT_F64_F32_e32_2:%[0-9]+]]:vreg_64_align2 = nofpexcept V_CVT_F64_F32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F64_F32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F64_F32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F64_F32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %0:vreg_64_align2 = nofpexcept V_CVT_F64_F32_e32 1, implicit $exec, implicit $mode
     %1:vreg_64_align2 = nofpexcept V_CVT_F64_F32_e32 2, implicit $exec, implicit $mode
@@ -459,12 +431,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_u32_f64_e32
-    ; GCN: renamable $vgpr0 = nofpexcept V_CVT_U32_F64_e32 1, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_CVT_U32_F64_e32 2, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_CVT_U32_F64_e32 3, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_CVT_U32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F64_e32 1, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_CVT_U32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F64_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_CVT_U32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F64_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_U32_F64_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_U32_F64_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_U32_F64_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_CVT_U32_F64_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_CVT_U32_F64_e32 2, implicit $exec, implicit $mode
@@ -480,12 +452,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_f64_u32_e32
-    ; GCN: renamable $vgpr0_vgpr1 = V_CVT_F64_U32_e32 1, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr2_vgpr3 = V_CVT_F64_U32_e32 2, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN-NEXT: renamable $vgpr0_vgpr1 = V_CVT_F64_U32_e32 3, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN: [[V_CVT_F64_U32_e32_:%[0-9]+]]:vreg_64_align2 = V_CVT_F64_U32_e32 1, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_CVT_F64_U32_e32_1:%[0-9]+]]:vreg_64_align2 = V_CVT_F64_U32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_CVT_F64_U32_e32_2:%[0-9]+]]:vreg_64_align2 = V_CVT_F64_U32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F64_U32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F64_U32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F64_U32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %0:vreg_64_align2 = V_CVT_F64_U32_e32 1, implicit $exec, implicit $mode
     %1:vreg_64_align2 = V_CVT_F64_U32_e32 2, implicit $exec, implicit $mode
@@ -501,12 +473,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_f32_i32_e32
-    ; GCN: renamable $vgpr0 = V_CVT_F32_I32_e32 1, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr1 = V_CVT_F32_I32_e32 2, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_CVT_F32_I32_e32 3, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_CVT_F32_I32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 1, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_CVT_F32_I32_e32_1:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_CVT_F32_I32_e32_2:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F32_I32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F32_I32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F32_I32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = V_CVT_F32_I32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = V_CVT_F32_I32_e32 2, implicit $exec, implicit $mode
@@ -522,12 +494,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_f32_i32_sdwa
-    ; GCN: renamable $vgpr0 = V_CVT_F32_I32_sdwa 0, undef $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr1 = V_CVT_F32_I32_sdwa 0, undef $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_CVT_F32_I32_sdwa 0, undef $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_CVT_F32_I32_sdwa:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_sdwa 0, undef %1:vgpr_32, 0, 0, 0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_CVT_F32_I32_sdwa1:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_sdwa 0, undef %1:vgpr_32, 0, 0, 0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_CVT_F32_I32_sdwa2:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_sdwa 0, undef %1:vgpr_32, 0, 0, 0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F32_I32_sdwa]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F32_I32_sdwa1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F32_I32_sdwa2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_CVT_F32_I32_sdwa 0, undef %0:vgpr_32, 0, 0, 0, 0, 0, implicit $exec, implicit $mode
     %2:vgpr_32 = V_CVT_F32_I32_sdwa 0, undef %0:vgpr_32, 0, 0, 0, 0, 0, implicit $exec, implicit $mode
@@ -547,16 +519,12 @@ machineFunctionInfo:
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_no_remat_v_cvt_f32_i32_sdwa_dst_unused_preserve
-    ; GCN: renamable $vgpr0 = V_CVT_F32_I32_sdwa 0, undef $vgpr0, 0, 0, 0, 2, 0, implicit $exec, implicit $mode, implicit undef $vgpr0(tied-def 0)
-    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN-NEXT: renamable $vgpr1 = V_CVT_F32_I32_sdwa 0, undef $vgpr0, 0, 0, 0, 2, 0, implicit $exec, implicit $mode, implicit undef $vgpr1(tied-def 0)
-    ; GCN-NEXT: renamable $vgpr0 = V_CVT_F32_I32_sdwa 0, undef $vgpr0, 0, 0, 0, 2, 0, implicit $exec, implicit $mode, implicit undef $vgpr0(tied-def 0)
-    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_CVT_F32_I32_sdwa:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_sdwa 0, undef %1:vgpr_32, 0, 0, 0, 2, 0, implicit $exec, implicit $mode, implicit undef [[V_CVT_F32_I32_sdwa]](tied-def 0)
+    ; GCN-NEXT: [[V_CVT_F32_I32_sdwa1:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_sdwa 0, undef %1:vgpr_32, 0, 0, 0, 2, 0, implicit $exec, implicit $mode, implicit undef [[V_CVT_F32_I32_sdwa1]](tied-def 0)
+    ; GCN-NEXT: [[V_CVT_F32_I32_sdwa2:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_sdwa 0, undef %1:vgpr_32, 0, 0, 0, 2, 0, implicit $exec, implicit $mode, implicit undef [[V_CVT_F32_I32_sdwa2]](tied-def 0)
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F32_I32_sdwa]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F32_I32_sdwa1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F32_I32_sdwa2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_CVT_F32_I32_sdwa 0, undef %0:vgpr_32, 0, 0, 0, 2, 0, implicit $exec, implicit $mode, implicit undef %1:vgpr_32(tied-def 0)
     %2:vgpr_32 = V_CVT_F32_I32_sdwa 0, undef %0:vgpr_32, 0, 0, 0, 2, 0, implicit $exec, implicit $mode, implicit undef %2:vgpr_32(tied-def 0)
@@ -572,12 +540,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_f32_u32_e32
-    ; GCN: renamable $vgpr0 = V_CVT_F32_U32_e32 1, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr1 = V_CVT_F32_U32_e32 2, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_CVT_F32_U32_e32 3, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_CVT_F32_U32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 1, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_CVT_F32_U32_e32_1:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_CVT_F32_U32_e32_2:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F32_U32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F32_U32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F32_U32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = V_CVT_F32_U32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = V_CVT_F32_U32_e32 2, implicit $exec, implicit $mode
@@ -593,12 +561,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_u32_f32_e32
-    ; GCN: renamable $vgpr0 = nofpexcept V_CVT_U32_F32_e32 1, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_CVT_U32_F32_e32 2, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_CVT_U32_F32_e32 3, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 1, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_CVT_U32_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_CVT_U32_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_U32_F32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_U32_F32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_U32_F32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 2, implicit $exec, implicit $mode
@@ -614,12 +582,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_i32_f32_e32
-    ; GCN: renamable $vgpr0 = nofpexcept V_CVT_I32_F32_e32 1, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_CVT_I32_F32_e32 2, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_CVT_I32_F32_e32 3, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 1, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_CVT_I32_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_CVT_I32_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 2, implicit $exec, implicit $mode
@@ -635,12 +603,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_f32_f16_e32
-    ; GCN: renamable $vgpr0 = nofpexcept V_CVT_F32_F16_e32 1, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_CVT_F32_F16_e32 2, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_CVT_F32_F16_e32 3, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_CVT_F32_F16_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 1, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_CVT_F32_F16_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_CVT_F32_F16_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F32_F16_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F32_F16_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F32_F16_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 2, implicit $exec, implicit $mode
@@ -656,12 +624,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_rpi_i32_f32_e32
-    ; GCN: renamable $vgpr0 = V_CVT_RPI_I32_F32_e32 1, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr1 = V_CVT_RPI_I32_F32_e32 2, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_CVT_RPI_I32_F32_e32 3, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_CVT_RPI_I32_F32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_RPI_I32_F32_e32 1, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_CVT_RPI_I32_F32_e32_1:%[0-9]+]]:vgpr_32 = V_CVT_RPI_I32_F32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_CVT_RPI_I32_F32_e32_2:%[0-9]+]]:vgpr_32 = V_CVT_RPI_I32_F32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_RPI_I32_F32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_RPI_I32_F32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_RPI_I32_F32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = V_CVT_RPI_I32_F32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = V_CVT_RPI_I32_F32_e32 2, implicit $exec, implicit $mode
@@ -677,12 +645,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_flr_i32_f32_e32
-    ; GCN: renamable $vgpr0 = V_CVT_FLR_I32_F32_e32 1, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr1 = V_CVT_FLR_I32_F32_e32 2, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_CVT_FLR_I32_F32_e32 3, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_CVT_FLR_I32_F32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_FLR_I32_F32_e32 1, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_CVT_FLR_I32_F32_e32_1:%[0-9]+]]:vgpr_32 = V_CVT_FLR_I32_F32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_CVT_FLR_I32_F32_e32_2:%[0-9]+]]:vgpr_32 = V_CVT_FLR_I32_F32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_FLR_I32_F32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_FLR_I32_F32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_FLR_I32_F32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = V_CVT_FLR_I32_F32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = V_CVT_FLR_I32_F32_e32 2, implicit $exec, implicit $mode
@@ -698,12 +666,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_off_f32_i4_e32
-    ; GCN: renamable $vgpr0 = V_CVT_OFF_F32_I4_e32 1, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr1 = V_CVT_OFF_F32_I4_e32 2, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_CVT_OFF_F32_I4_e32 3, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_CVT_OFF_F32_I4_e32_:%[0-9]+]]:vgpr_32 = V_CVT_OFF_F32_I4_e32 1, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_CVT_OFF_F32_I4_e32_1:%[0-9]+]]:vgpr_32 = V_CVT_OFF_F32_I4_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_CVT_OFF_F32_I4_e32_2:%[0-9]+]]:vgpr_32 = V_CVT_OFF_F32_I4_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_OFF_F32_I4_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_OFF_F32_I4_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_OFF_F32_I4_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = V_CVT_OFF_F32_I4_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = V_CVT_OFF_F32_I4_e32 2, implicit $exec, implicit $mode
@@ -719,12 +687,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_f32_ubyte0_e32
-    ; GCN: renamable $vgpr0 = V_CVT_F32_UBYTE0_e32 1, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr1 = V_CVT_F32_UBYTE0_e32 2, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_CVT_F32_UBYTE0_e32 3, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_CVT_F32_UBYTE0_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_UBYTE0_e32 1, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_CVT_F32_UBYTE0_e32_1:%[0-9]+]]:vgpr_32 = V_CVT_F32_UBYTE0_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_CVT_F32_UBYTE0_e32_2:%[0-9]+]]:vgpr_32 = V_CVT_F32_UBYTE0_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F32_UBYTE0_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F32_UBYTE0_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F32_UBYTE0_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = V_CVT_F32_UBYTE0_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = V_CVT_F32_UBYTE0_e32 2, implicit $exec, implicit $mode
@@ -740,12 +708,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_fract_f32_e32
-    ; GCN: renamable $vgpr0 = nofpexcept V_FRACT_F32_e32 1, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_FRACT_F32_e32 2, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_FRACT_F32_e32 3, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_FRACT_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_FRACT_F32_e32 1, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_FRACT_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_FRACT_F32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_FRACT_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_FRACT_F32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_FRACT_F32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_FRACT_F32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_FRACT_F32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_FRACT_F32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_FRACT_F32_e32 2, implicit $exec, implicit $mode
@@ -761,12 +729,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_trunc_f32_e32
-    ; GCN: renamable $vgpr0 = nofpexcept V_TRUNC_F32_e32 1, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_TRUNC_F32_e32 2, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_TRUNC_F32_e32 3, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_TRUNC_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_TRUNC_F32_e32 1, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_TRUNC_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_TRUNC_F32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_TRUNC_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_TRUNC_F32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_TRUNC_F32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_TRUNC_F32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_TRUNC_F32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_TRUNC_F32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_TRUNC_F32_e32 2, implicit $exec, implicit $mode
@@ -782,12 +750,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_ceil_f32_e32
-    ; GCN: renamable $vgpr0 = nofpexcept V_CEIL_F32_e32 1, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_CEIL_F32_e32 2, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_CEIL_F32_e32 3, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_CEIL_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CEIL_F32_e32 1, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_CEIL_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CEIL_F32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_CEIL_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CEIL_F32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CEIL_F32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CEIL_F32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CEIL_F32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_CEIL_F32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_CEIL_F32_e32 2, implicit $exec, implicit $mode
@@ -803,12 +771,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_rndne_f32_e32
-    ; GCN: renamable $vgpr0 = nofpexcept V_RNDNE_F32_e32 1, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_RNDNE_F32_e32 2, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_RNDNE_F32_e32 3, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_RNDNE_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_RNDNE_F32_e32 1, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_RNDNE_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_RNDNE_F32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_RNDNE_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_RNDNE_F32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_RNDNE_F32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_RNDNE_F32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_RNDNE_F32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_RNDNE_F32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_RNDNE_F32_e32 2, implicit $exec, implicit $mode
@@ -824,12 +792,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_floor_f32_e32
-    ; GCN: renamable $vgpr0 = nofpexcept V_FLOOR_F32_e32 1, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_FLOOR_F32_e32 2, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_FLOOR_F32_e32 3, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_FLOOR_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_FLOOR_F32_e32 1, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_FLOOR_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_FLOOR_F32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_FLOOR_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_FLOOR_F32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_FLOOR_F32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_FLOOR_F32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_FLOOR_F32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_FLOOR_F32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_FLOOR_F32_e32 2, implicit $exec, implicit $mode
@@ -845,12 +813,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_exp_f32_e32
-    ; GCN: renamable $vgpr0 = nofpexcept V_EXP_F32_e32 1, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_EXP_F32_e32 2, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_EXP_F32_e32 3, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_EXP_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_EXP_F32_e32 1, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_EXP_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_EXP_F32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_EXP_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_EXP_F32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_EXP_F32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_EXP_F32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_EXP_F32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_EXP_F32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_EXP_F32_e32 2, implicit $exec, implicit $mode
@@ -866,12 +834,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_log_f32_e32
-    ; GCN: renamable $vgpr0 = nofpexcept V_LOG_F32_e32 1, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_LOG_F32_e32 2, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_LOG_F32_e32 3, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_LOG_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_LOG_F32_e32 1, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_LOG_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_LOG_F32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_LOG_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_LOG_F32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_LOG_F32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_LOG_F32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_LOG_F32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_LOG_F32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_LOG_F32_e32 2, implicit $exec, implicit $mode
@@ -887,12 +855,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_rcp_f32_e32
-    ; GCN: renamable $vgpr0 = nofpexcept V_RCP_F32_e32 1, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_RCP_F32_e32 2, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_RCP_F32_e32 3, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_RCP_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_RCP_F32_e32 1, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_RCP_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_RCP_F32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_RCP_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_RCP_F32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_RCP_F32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_RCP_F32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_RCP_F32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_RCP_F32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_RCP_F32_e32 2, implicit $exec, implicit $mode
@@ -908,12 +876,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_rcp_iflag_f32_e32
-    ; GCN: renamable $vgpr0 = nofpexcept V_RCP_IFLAG_F32_e32 1, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_RCP_IFLAG_F32_e32 2, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_RCP_IFLAG_F32_e32 3, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_RCP_IFLAG_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_RCP_IFLAG_F32_e32 1, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_RCP_IFLAG_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_RCP_IFLAG_F32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_RCP_IFLAG_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_RCP_IFLAG_F32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_RCP_IFLAG_F32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_RCP_IFLAG_F32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_RCP_IFLAG_F32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_RCP_IFLAG_F32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_RCP_IFLAG_F32_e32 2, implicit $exec, implicit $mode
@@ -929,12 +897,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_rsq_f32_e32
-    ; GCN: renamable $vgpr0 = nofpexcept V_RSQ_F32_e32 1, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_RSQ_F32_e32 2, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_RSQ_F32_e32 3, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_RSQ_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_RSQ_F32_e32 1, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_RSQ_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_RSQ_F32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_RSQ_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_RSQ_F32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_RSQ_F32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_RSQ_F32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_RSQ_F32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_RSQ_F32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_RSQ_F32_e32 2, implicit $exec, implicit $mode
@@ -950,12 +918,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_sqrt_f32_e32
-    ; GCN: renamable $vgpr0 = nofpexcept V_SQRT_F32_e32 1, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_SQRT_F32_e32 2, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_SQRT_F32_e32 3, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_SQRT_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_SQRT_F32_e32 1, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_SQRT_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_SQRT_F32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_SQRT_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_SQRT_F32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_SQRT_F32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_SQRT_F32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_SQRT_F32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_SQRT_F32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_SQRT_F32_e32 2, implicit $exec, implicit $mode
@@ -971,12 +939,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_rcp_f64_e32
-    ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_RCP_F64_e32 1, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_RCP_F64_e32 2, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_RCP_F64_e32 3, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN: [[V_RCP_F64_e32_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_RCP_F64_e32 1, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_RCP_F64_e32_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_RCP_F64_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_RCP_F64_e32_2:%[0-9]+]]:vreg_64_align2 = nofpexcept V_RCP_F64_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_RCP_F64_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_RCP_F64_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_RCP_F64_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %0:vreg_64_align2 = nofpexcept V_RCP_F64_e32 1, implicit $exec, implicit $mode
     %1:vreg_64_align2 = nofpexcept V_RCP_F64_e32 2, implicit $exec, implicit $mode
@@ -992,12 +960,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_rsq_f64_e32
-    ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_RSQ_F64_e32 1, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_RSQ_F64_e32 2, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_RSQ_F64_e32 3, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN: [[V_RSQ_F64_e32_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_RSQ_F64_e32 1, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_RSQ_F64_e32_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_RSQ_F64_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_RSQ_F64_e32_2:%[0-9]+]]:vreg_64_align2 = nofpexcept V_RSQ_F64_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_RSQ_F64_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_RSQ_F64_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_RSQ_F64_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %0:vreg_64_align2 = nofpexcept V_RSQ_F64_e32 1, implicit $exec, implicit $mode
     %1:vreg_64_align2 = nofpexcept V_RSQ_F64_e32 2, implicit $exec, implicit $mode
@@ -1013,12 +981,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_sqrt_f64_e32
-    ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_SQRT_F64_e32 1, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_SQRT_F64_e32 2, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_SQRT_F64_e32 3, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN: [[V_SQRT_F64_e32_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_SQRT_F64_e32 1, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_SQRT_F64_e32_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_SQRT_F64_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_SQRT_F64_e32_2:%[0-9]+]]:vreg_64_align2 = nofpexcept V_SQRT_F64_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_SQRT_F64_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_SQRT_F64_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_SQRT_F64_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %0:vreg_64_align2 = nofpexcept V_SQRT_F64_e32 1, implicit $exec, implicit $mode
     %1:vreg_64_align2 = nofpexcept V_SQRT_F64_e32 2, implicit $exec, implicit $mode
@@ -1034,12 +1002,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_sin_f32_e32
-    ; GCN: renamable $vgpr0 = nofpexcept V_SIN_F32_e32 1, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_SIN_F32_e32 2, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_SIN_F32_e32 3, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_SIN_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_SIN_F32_e32 1, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_SIN_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_SIN_F32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_SIN_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_SIN_F32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_SIN_F32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_SIN_F32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_SIN_F32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_SIN_F32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_SIN_F32_e32 2, implicit $exec, implicit $mode
@@ -1055,12 +1023,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cos_f32_e32
-    ; GCN: renamable $vgpr0 = nofpexcept V_COS_F32_e32 1, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_COS_F32_e32 2, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_COS_F32_e32 3, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_COS_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_COS_F32_e32 1, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_COS_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_COS_F32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_COS_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_COS_F32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_COS_F32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_COS_F32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_COS_F32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_COS_F32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_COS_F32_e32 2, implicit $exec, implicit $mode
@@ -1076,12 +1044,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_not_b32_e32
-    ; GCN: renamable $vgpr0 = V_NOT_B32_e32 1, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr1 = V_NOT_B32_e32 2, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_NOT_B32_e32 3, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_NOT_B32_e32_:%[0-9]+]]:vgpr_32 = V_NOT_B32_e32 1, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_NOT_B32_e32_1:%[0-9]+]]:vgpr_32 = V_NOT_B32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_NOT_B32_e32_2:%[0-9]+]]:vgpr_32 = V_NOT_B32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_NOT_B32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_NOT_B32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_NOT_B32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = V_NOT_B32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = V_NOT_B32_e32 2, implicit $exec, implicit $mode
@@ -1097,12 +1065,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_bfrev_b32_e32
-    ; GCN: renamable $vgpr0 = V_BFREV_B32_e32 1, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr1 = V_BFREV_B32_e32 2, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_BFREV_B32_e32 3, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_BFREV_B32_e32_:%[0-9]+]]:vgpr_32 = V_BFREV_B32_e32 1, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_BFREV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_BFREV_B32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_BFREV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_BFREV_B32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_BFREV_B32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_BFREV_B32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_BFREV_B32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = V_BFREV_B32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = V_BFREV_B32_e32 2, implicit $exec, implicit $mode
@@ -1118,12 +1086,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_ffbh_u32_e32
-    ; GCN: renamable $vgpr0 = V_FFBH_U32_e32 1, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr1 = V_FFBH_U32_e32 2, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_FFBH_U32_e32 3, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_FFBH_U32_e32_:%[0-9]+]]:vgpr_32 = V_FFBH_U32_e32 1, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_FFBH_U32_e32_1:%[0-9]+]]:vgpr_32 = V_FFBH_U32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_FFBH_U32_e32_2:%[0-9]+]]:vgpr_32 = V_FFBH_U32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_FFBH_U32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_FFBH_U32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_FFBH_U32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = V_FFBH_U32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = V_FFBH_U32_e32 2, implicit $exec, implicit $mode
@@ -1139,12 +1107,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_ffbl_b32_e32
-    ; GCN: renamable $vgpr0 = V_FFBL_B32_e32 1, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr1 = V_FFBL_B32_e32 2, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_FFBL_B32_e32 3, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_FFBL_B32_e32_:%[0-9]+]]:vgpr_32 = V_FFBL_B32_e32 1, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_FFBL_B32_e32_1:%[0-9]+]]:vgpr_32 = V_FFBL_B32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_FFBL_B32_e32_2:%[0-9]+]]:vgpr_32 = V_FFBL_B32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_FFBL_B32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_FFBL_B32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_FFBL_B32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = V_FFBL_B32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = V_FFBL_B32_e32 2, implicit $exec, implicit $mode
@@ -1160,12 +1128,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_ffbh_i32_e32
-    ; GCN: renamable $vgpr0 = V_FFBH_I32_e32 1, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr1 = V_FFBH_I32_e32 2, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_FFBH_I32_e32 3, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_FFBH_I32_e32_:%[0-9]+]]:vgpr_32 = V_FFBH_I32_e32 1, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_FFBH_I32_e32_1:%[0-9]+]]:vgpr_32 = V_FFBH_I32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_FFBH_I32_e32_2:%[0-9]+]]:vgpr_32 = V_FFBH_I32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_FFBH_I32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_FFBH_I32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_FFBH_I32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = V_FFBH_I32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = V_FFBH_I32_e32 2, implicit $exec, implicit $mode
@@ -1181,12 +1149,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_frexp_exp_i32_f64_e32
-    ; GCN: renamable $vgpr0 = nofpexcept V_FREXP_EXP_I32_F64_e32 1, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_FREXP_EXP_I32_F64_e32 2, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_FREXP_EXP_I32_F64_e32 3, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_FREXP_EXP_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_FREXP_EXP_I32_F64_e32 1, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_FREXP_EXP_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_FREXP_EXP_I32_F64_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_FREXP_EXP_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_FREXP_EXP_I32_F64_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_FREXP_EXP_I32_F64_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_FREXP_EXP_I32_F64_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_FREXP_EXP_I32_F64_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_FREXP_EXP_I32_F64_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_FREXP_EXP_I32_F64_e32 2, implicit $exec, implicit $mode
@@ -1202,12 +1170,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_frexp_mant_f64_e32
-    ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_FREXP_MANT_F64_e32 1, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_FREXP_MANT_F64_e32 2, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_FREXP_MANT_F64_e32 3, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN: [[V_FREXP_MANT_F64_e32_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_FREXP_MANT_F64_e32 1, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_FREXP_MANT_F64_e32_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_FREXP_MANT_F64_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_FREXP_MANT_F64_e32_2:%[0-9]+]]:vreg_64_align2 = nofpexcept V_FREXP_MANT_F64_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_FREXP_MANT_F64_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_FREXP_MANT_F64_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_FREXP_MANT_F64_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %0:vreg_64_align2 = nofpexcept V_FREXP_MANT_F64_e32 1, implicit $exec, implicit $mode
     %1:vreg_64_align2 = nofpexcept V_FREXP_MANT_F64_e32 2, implicit $exec, implicit $mode
@@ -1223,12 +1191,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_fract_f64_e32
-    ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_FRACT_F64_e32 1, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_FRACT_F64_e32 2, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_FRACT_F64_e32 3, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN: [[V_FRACT_F64_e32_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_FRACT_F64_e32 1, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_FRACT_F64_e32_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_FRACT_F64_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_FRACT_F64_e32_2:%[0-9]+]]:vreg_64_align2 = nofpexcept V_FRACT_F64_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_FRACT_F64_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_FRACT_F64_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_FRACT_F64_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %0:vreg_64_align2 = nofpexcept V_FRACT_F64_e32 1, implicit $exec, implicit $mode
     %1:vreg_64_align2 = nofpexcept V_FRACT_F64_e32 2, implicit $exec, implicit $mode
@@ -1244,12 +1212,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_frexp_exp_i32_f32_e32
-    ; GCN: renamable $vgpr0 = nofpexcept V_FREXP_EXP_I32_F32_e32 1, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_FREXP_EXP_I32_F32_e32 2, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_FREXP_EXP_I32_F32_e32 3, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_FREXP_EXP_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_FREXP_EXP_I32_F32_e32 1, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_FREXP_EXP_I32_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_FREXP_EXP_I32_F32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_FREXP_EXP_I32_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_FREXP_EXP_I32_F32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_FREXP_EXP_I32_F32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_FREXP_EXP_I32_F32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_FREXP_EXP_I32_F32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_FREXP_EXP_I32_F32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_FREXP_EXP_I32_F32_e32 2, implicit $exec, implicit $mode
@@ -1265,12 +1233,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_frexp_mant_f32_e32
-    ; GCN: renamable $vgpr0 = nofpexcept V_FREXP_MANT_F32_e32 1, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_FREXP_MANT_F32_e32 2, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_FREXP_MANT_F32_e32 3, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_FREXP_MANT_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_FREXP_MANT_F32_e32 1, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_FREXP_MANT_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_FREXP_MANT_F32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_FREXP_MANT_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_FREXP_MANT_F32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_FREXP_MANT_F32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_FREXP_MANT_F32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_FREXP_MANT_F32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_FREXP_MANT_F32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_FREXP_MANT_F32_e32 2, implicit $exec, implicit $mode
@@ -1286,12 +1254,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_exp_legacy_f32_e32
-    ; GCN: renamable $vgpr0 = nofpexcept V_EXP_LEGACY_F32_e32 1, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_EXP_LEGACY_F32_e32 2, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_EXP_LEGACY_F32_e32 3, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_EXP_LEGACY_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_EXP_LEGACY_F32_e32 1, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_EXP_LEGACY_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_EXP_LEGACY_F32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_EXP_LEGACY_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_EXP_LEGACY_F32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_EXP_LEGACY_F32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_EXP_LEGACY_F32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_EXP_LEGACY_F32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_EXP_LEGACY_F32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_EXP_LEGACY_F32_e32 2, implicit $exec, implicit $mode
@@ -1307,12 +1275,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_log_legacy_f32_e32
-    ; GCN: renamable $vgpr0 = nofpexcept V_LOG_LEGACY_F32_e32 1, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_LOG_LEGACY_F32_e32 2, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_LOG_LEGACY_F32_e32 3, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_LOG_LEGACY_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_LOG_LEGACY_F32_e32 1, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_LOG_LEGACY_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_LOG_LEGACY_F32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_LOG_LEGACY_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_LOG_LEGACY_F32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_LOG_LEGACY_F32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_LOG_LEGACY_F32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_LOG_LEGACY_F32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_LOG_LEGACY_F32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_LOG_LEGACY_F32_e32 2, implicit $exec, implicit $mode
@@ -1328,12 +1296,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_sat_pk_u8_i16_e32
-    ; GCN: renamable $vgpr0 = V_SAT_PK_U8_I16_e32 1, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr1 = V_SAT_PK_U8_I16_e32 2, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_SAT_PK_U8_I16_e32 3, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_SAT_PK_U8_I16_e32_:%[0-9]+]]:vgpr_32 = V_SAT_PK_U8_I16_e32 1, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_SAT_PK_U8_I16_e32_1:%[0-9]+]]:vgpr_32 = V_SAT_PK_U8_I16_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_SAT_PK_U8_I16_e32_2:%[0-9]+]]:vgpr_32 = V_SAT_PK_U8_I16_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_SAT_PK_U8_I16_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_SAT_PK_U8_I16_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_SAT_PK_U8_I16_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = V_SAT_PK_U8_I16_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = V_SAT_PK_U8_I16_e32 2, implicit $exec, implicit $mode
@@ -1349,12 +1317,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_accvgpr_mov_b32
-    ; GCN: renamable $agpr0 = V_ACCVGPR_MOV_B32 undef $agpr0, implicit $exec
-    ; GCN-NEXT: renamable $agpr1 = V_ACCVGPR_MOV_B32 undef $agpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $agpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $agpr1
-    ; GCN-NEXT: renamable $agpr0 = V_ACCVGPR_MOV_B32 undef $agpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $agpr0
+    ; GCN: [[V_ACCVGPR_MOV_B32_:%[0-9]+]]:agpr_32 = V_ACCVGPR_MOV_B32 undef $agpr0, implicit $exec
+    ; GCN-NEXT: [[V_ACCVGPR_MOV_B32_1:%[0-9]+]]:agpr_32 = V_ACCVGPR_MOV_B32 undef $agpr0, implicit $exec
+    ; GCN-NEXT: [[V_ACCVGPR_MOV_B32_2:%[0-9]+]]:agpr_32 = V_ACCVGPR_MOV_B32 undef $agpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_ACCVGPR_MOV_B32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_ACCVGPR_MOV_B32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_ACCVGPR_MOV_B32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %0:agpr_32 = V_ACCVGPR_MOV_B32 undef $agpr0, implicit $exec
     %1:agpr_32 = V_ACCVGPR_MOV_B32 undef $agpr0, implicit $exec
@@ -1372,16 +1340,12 @@ machineFunctionInfo:
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_no_remat_v_cndmask_b32_e32
-    ; GCN: renamable $vgpr0 = V_CNDMASK_B32_e32 1, undef $vgpr0, implicit $exec, implicit undef $vcc
-    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN-NEXT: renamable $vgpr1 = V_CNDMASK_B32_e32 1, undef $vgpr0, implicit $exec, implicit undef $vcc
-    ; GCN-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e32 1, undef $vgpr0, implicit $exec, implicit undef $vcc
-    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_CNDMASK_B32_e32_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e32 1, undef %1:vgpr_32, implicit $exec, implicit undef $vcc
+    ; GCN-NEXT: [[V_CNDMASK_B32_e32_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e32 1, undef %1:vgpr_32, implicit $exec, implicit undef $vcc
+    ; GCN-NEXT: [[V_CNDMASK_B32_e32_2:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e32 1, undef %1:vgpr_32, implicit $exec, implicit undef $vcc
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CNDMASK_B32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CNDMASK_B32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CNDMASK_B32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_CNDMASK_B32_e32 1, undef %0:vgpr_32, implicit $exec, implicit undef $vcc
     %2:vgpr_32 = V_CNDMASK_B32_e32 1, undef %0:vgpr_32, implicit $exec, implicit undef $vcc
@@ -1399,16 +1363,12 @@ machineFunctionInfo:
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_no_remat_v_cndmask_b32_sdwa
-    ; GCN: renamable $vgpr0 = V_CNDMASK_B32_sdwa 0, undef $vgpr0, 0, undef $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit undef $vcc
-    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN-NEXT: renamable $vgpr1 = V_CNDMASK_B32_sdwa 0, undef $vgpr0, 0, undef $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit undef $vcc
-    ; GCN-NEXT: renamable $vgpr0 = V_CNDMASK_B32_sdwa 0, undef $vgpr0, 0, undef $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit undef $vcc
-    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_CNDMASK_B32_sdwa:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_sdwa 0, undef %1:vgpr_32, 0, undef %1:vgpr_32, 0, 0, 0, 0, 0, implicit $exec, implicit undef $vcc
+    ; GCN-NEXT: [[V_CNDMASK_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_sdwa 0, undef %1:vgpr_32, 0, undef %1:vgpr_32, 0, 0, 0, 0, 0, implicit $exec, implicit undef $vcc
+    ; GCN-NEXT: [[V_CNDMASK_B32_sdwa2:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_sdwa 0, undef %1:vgpr_32, 0, undef %1:vgpr_32, 0, 0, 0, 0, 0, implicit $exec, implicit undef $vcc
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CNDMASK_B32_sdwa]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CNDMASK_B32_sdwa1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CNDMASK_B32_sdwa2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_CNDMASK_B32_sdwa 0, undef %0:vgpr_32, 0, undef %0:vgpr_32, 0, 0, 0, 0, 0, implicit $exec, implicit undef $vcc
     %2:vgpr_32 = V_CNDMASK_B32_sdwa 0, undef %0:vgpr_32, 0, undef %0:vgpr_32, 0, 0, 0, 0, 0, implicit $exec, implicit undef $vcc
@@ -1426,16 +1386,12 @@ machineFunctionInfo:
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_no_remat_v_cndmask_b32_dpp
-    ; GCN: renamable $vgpr0 = V_CNDMASK_B32_dpp undef $vgpr0, 0, undef $vgpr0, 0, undef $vgpr0, 1, 15, 15, 10, implicit $exec, implicit undef $vcc
-    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN-NEXT: renamable $vgpr1 = V_CNDMASK_B32_dpp undef $vgpr1, 0, undef $vgpr0, 0, undef $vgpr0, 1, 15, 15, 10, implicit $exec, implicit undef $vcc
-    ; GCN-NEXT: renamable $vgpr0 = V_CNDMASK_B32_dpp undef $vgpr0, 0, undef $vgpr0, 0, undef $vgpr0, 1, 15, 15, 10, implicit $exec, implicit undef $vcc
-    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_CNDMASK_B32_dpp:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_dpp undef [[V_CNDMASK_B32_dpp]], 0, undef %1:vgpr_32, 0, undef %1:vgpr_32, 1, 15, 15, 10, implicit $exec, implicit undef $vcc
+    ; GCN-NEXT: [[V_CNDMASK_B32_dpp1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_dpp undef [[V_CNDMASK_B32_dpp1]], 0, undef %1:vgpr_32, 0, undef %1:vgpr_32, 1, 15, 15, 10, implicit $exec, implicit undef $vcc
+    ; GCN-NEXT: [[V_CNDMASK_B32_dpp2:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_dpp undef [[V_CNDMASK_B32_dpp2]], 0, undef %1:vgpr_32, 0, undef %1:vgpr_32, 1, 15, 15, 10, implicit $exec, implicit undef $vcc
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CNDMASK_B32_dpp]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CNDMASK_B32_dpp1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CNDMASK_B32_dpp2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_CNDMASK_B32_dpp undef %1:vgpr_32, 0, undef %0:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 10, implicit $exec, implicit undef $vcc
     %2:vgpr_32 = V_CNDMASK_B32_dpp undef %2:vgpr_32, 0, undef %0:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 10, implicit $exec, implicit undef $vcc
@@ -1451,12 +1407,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cndmask_b32_e64
-    ; GCN: renamable $vgpr0 = V_CNDMASK_B32_e64 0, 1, 0, 2, undef $sgpr0_sgpr1, implicit $exec
-    ; GCN-NEXT: renamable $vgpr1 = V_CNDMASK_B32_e64 0, 1, 0, 2, undef $sgpr0_sgpr1, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, 1, 0, 2, undef $sgpr0_sgpr1, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 1, 0, 2, undef $sgpr0_sgpr1, implicit $exec
+    ; GCN-NEXT: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 1, 0, 2, undef $sgpr0_sgpr1, implicit $exec
+    ; GCN-NEXT: [[V_CNDMASK_B32_e64_2:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 1, 0, 2, undef $sgpr0_sgpr1, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CNDMASK_B32_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CNDMASK_B32_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CNDMASK_B32_e64_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_CNDMASK_B32_e64 0, 1, 0, 2, undef %0:sreg_64_xexec, implicit $exec
     %2:vgpr_32 = V_CNDMASK_B32_e64 0, 1, 0, 2, undef %0:sreg_64_xexec, implicit $exec
@@ -1472,12 +1428,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_madmk_f32
-    ; GCN: renamable $vgpr0 = nofpexcept V_MADMK_F32 1, 1, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MADMK_F32 2, 2, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_MADMK_F32 3, 3, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_MADMK_F32_:%[0-9]+]]:vgpr_32 = nofpexcept V_MADMK_F32 1, 1, undef %1:vgpr_32, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_MADMK_F32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MADMK_F32 2, 2, undef %1:vgpr_32, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_MADMK_F32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MADMK_F32 3, 3, undef %1:vgpr_32, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MADMK_F32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MADMK_F32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MADMK_F32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_MADMK_F32 1, 1, undef %0:vgpr_32, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_MADMK_F32 2, 2, undef %0:vgpr_32, implicit $exec, implicit $mode
@@ -1493,12 +1449,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_add_f32_e32
-    ; GCN: renamable $vgpr0 = nofpexcept V_ADD_F32_e32 1, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_ADD_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_ADD_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_ADD_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e32 1, undef %1:vgpr_32, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_ADD_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e32 2, undef %1:vgpr_32, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_ADD_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e32 3, undef %1:vgpr_32, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_F32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_F32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_F32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_ADD_F32_e32 1, undef %0:vgpr_32, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_ADD_F32_e32 2, undef %0:vgpr_32, implicit $exec, implicit $mode
@@ -1514,12 +1470,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_add_f32_e64
-    ; GCN: renamable $vgpr0 = nofpexcept V_ADD_F32_e64 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_ADD_F32_e64 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_ADD_F32_e64 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 1, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_ADD_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 2, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_ADD_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 3, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_F32_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_F32_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_F32_e64_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
@@ -1535,12 +1491,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_add_f32_sdwa
-    ; GCN: renamable $vgpr0 = nofpexcept V_ADD_F32_sdwa 0, undef $vgpr0, 0, undef $vgpr0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_ADD_F32_sdwa 0, undef $vgpr0, 0, undef $vgpr0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_ADD_F32_sdwa 0, undef $vgpr0, 0, undef $vgpr0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_ADD_F32_sdwa:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_sdwa 0, undef %1:vgpr_32, 0, undef %1:vgpr_32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_ADD_F32_sdwa1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_sdwa 0, undef %1:vgpr_32, 0, undef %1:vgpr_32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_ADD_F32_sdwa2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_sdwa 0, undef %1:vgpr_32, 0, undef %1:vgpr_32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_F32_sdwa]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_F32_sdwa1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_F32_sdwa2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_ADD_F32_sdwa 0, undef %0:vgpr_32, 0, undef %0:vgpr_32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_ADD_F32_sdwa 0, undef %0:vgpr_32, 0, undef %0:vgpr_32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $mode
@@ -1558,16 +1514,12 @@ machineFunctionInfo:
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_no_remat_v_add_f32_dpp
-    ; GCN: renamable $vgpr0 = nofpexcept V_ADD_F32_dpp undef $vgpr0, 0, undef $vgpr0, 0, undef $vgpr0, 1, 15, 15, 10, implicit $exec, implicit $mode
-    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_ADD_F32_dpp undef $vgpr1, 0, undef $vgpr0, 0, undef $vgpr0, 1, 15, 15, 10, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_ADD_F32_dpp undef $vgpr0, 0, undef $vgpr0, 0, undef $vgpr0, 1, 15, 15, 10, implicit $exec, implicit $mode
-    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_ADD_F32_dpp:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_dpp undef [[V_ADD_F32_dpp]], 0, undef %1:vgpr_32, 0, undef %1:vgpr_32, 1, 15, 15, 10, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_ADD_F32_dpp1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_dpp undef [[V_ADD_F32_dpp1]], 0, undef %1:vgpr_32, 0, undef %1:vgpr_32, 1, 15, 15, 10, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_ADD_F32_dpp2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_dpp undef [[V_ADD_F32_dpp2]], 0, undef %1:vgpr_32, 0, undef %1:vgpr_32, 1, 15, 15, 10, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_F32_dpp]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_F32_dpp1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_F32_dpp2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_ADD_F32_dpp undef %1:vgpr_32, 0, undef %0:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 10, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_ADD_F32_dpp undef %2:vgpr_32, 0, undef %0:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 10, implicit $exec, implicit $mode
@@ -1583,12 +1535,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_sub_f32_e32
-    ; GCN: renamable $vgpr0 = nofpexcept V_SUB_F32_e32 1, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_SUB_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_SUB_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_SUB_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_SUB_F32_e32 1, undef %1:vgpr_32, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_SUB_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_SUB_F32_e32 2, undef %1:vgpr_32, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_SUB_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_SUB_F32_e32 3, undef %1:vgpr_32, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_SUB_F32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_SUB_F32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_SUB_F32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_SUB_F32_e32 1, undef %0:vgpr_32, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_SUB_F32_e32 2, undef %0:vgpr_32, implicit $exec, implicit $mode
@@ -1604,12 +1556,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_subrev_f32_e32
-    ; GCN: renamable $vgpr0 = nofpexcept V_SUBREV_F32_e32 1, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_SUBREV_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_SUBREV_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_SUBREV_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_SUBREV_F32_e32 1, undef %1:vgpr_32, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_SUBREV_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_SUBREV_F32_e32 2, undef %1:vgpr_32, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_SUBREV_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_SUBREV_F32_e32 3, undef %1:vgpr_32, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_SUBREV_F32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_SUBREV_F32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_SUBREV_F32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_SUBREV_F32_e32 1, undef %0:vgpr_32, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_SUBREV_F32_e32 2, undef %0:vgpr_32, implicit $exec, implicit $mode
@@ -1625,12 +1577,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_mul_legacy_f32_e32
-    ; GCN: renamable $vgpr0 = nofpexcept V_MUL_LEGACY_F32_e32 1, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MUL_LEGACY_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_MUL_LEGACY_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_MUL_LEGACY_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_LEGACY_F32_e32 1, undef %1:vgpr_32, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_MUL_LEGACY_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_LEGACY_F32_e32 2, undef %1:vgpr_32, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_MUL_LEGACY_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_LEGACY_F32_e32 3, undef %1:vgpr_32, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_LEGACY_F32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_LEGACY_F32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_LEGACY_F32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_MUL_LEGACY_F32_e32 1, undef %0:vgpr_32, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_MUL_LEGACY_F32_e32 2, undef %0:vgpr_32, implicit $exec, implicit $mode
@@ -1646,12 +1598,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_mul_f32_e32
-    ; GCN: renamable $vgpr0 = nofpexcept V_MUL_F32_e32 1, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MUL_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_MUL_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_MUL_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e32 1, undef %1:vgpr_32, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_MUL_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e32 2, undef %1:vgpr_32, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_MUL_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e32 3, undef %1:vgpr_32, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_F32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_F32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_F32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_MUL_F32_e32 1, undef %0:vgpr_32, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_MUL_F32_e32 2, undef %0:vgpr_32, implicit $exec, implicit $mode
@@ -1667,12 +1619,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_mul_i32_i24_e32
-    ; GCN: renamable $vgpr0 = V_MUL_I32_I24_e32 1, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: renamable $vgpr1 = V_MUL_I32_I24_e32 2, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_MUL_I32_I24_e32 3, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_MUL_I32_I24_e32_:%[0-9]+]]:vgpr_32 = V_MUL_I32_I24_e32 1, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_MUL_I32_I24_e32_1:%[0-9]+]]:vgpr_32 = V_MUL_I32_I24_e32 2, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_MUL_I32_I24_e32_2:%[0-9]+]]:vgpr_32 = V_MUL_I32_I24_e32 3, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_I32_I24_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_I32_I24_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_I32_I24_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MUL_I32_I24_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MUL_I32_I24_e32 2, undef %0:vgpr_32, implicit $exec
@@ -1688,12 +1640,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_mul_hi_i32_i24_e32
-    ; GCN: renamable $vgpr0 = V_MUL_HI_I32_I24_e32 1, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: renamable $vgpr1 = V_MUL_HI_I32_I24_e32 2, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_MUL_HI_I32_I24_e32 3, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_MUL_HI_I32_I24_e32_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32_I24_e32 1, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_MUL_HI_I32_I24_e32_1:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32_I24_e32 2, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_MUL_HI_I32_I24_e32_2:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32_I24_e32 3, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_HI_I32_I24_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_HI_I32_I24_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_HI_I32_I24_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MUL_HI_I32_I24_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MUL_HI_I32_I24_e32 2, undef %0:vgpr_32, implicit $exec
@@ -1709,12 +1661,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_mul_u32_u24_e32
-    ; GCN: renamable $vgpr0 = V_MUL_U32_U24_e32 1, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: renamable $vgpr1 = V_MUL_U32_U24_e32 2, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_MUL_U32_U24_e32 3, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_MUL_U32_U24_e32_:%[0-9]+]]:vgpr_32 = V_MUL_U32_U24_e32 1, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_MUL_U32_U24_e32_1:%[0-9]+]]:vgpr_32 = V_MUL_U32_U24_e32 2, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_MUL_U32_U24_e32_2:%[0-9]+]]:vgpr_32 = V_MUL_U32_U24_e32 3, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_U32_U24_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_U32_U24_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_U32_U24_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MUL_U32_U24_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MUL_U32_U24_e32 2, undef %0:vgpr_32, implicit $exec
@@ -1730,12 +1682,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_mul_hi_u32_u24_e32
-    ; GCN: renamable $vgpr0 = V_MUL_HI_U32_U24_e32 1, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: renamable $vgpr1 = V_MUL_HI_U32_U24_e32 2, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_MUL_HI_U32_U24_e32 3, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_MUL_HI_U32_U24_e32_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_U24_e32 1, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_MUL_HI_U32_U24_e32_1:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_U24_e32 2, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_MUL_HI_U32_U24_e32_2:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_U24_e32 3, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_HI_U32_U24_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_HI_U32_U24_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_HI_U32_U24_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MUL_HI_U32_U24_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MUL_HI_U32_U24_e32 2, undef %0:vgpr_32, implicit $exec
@@ -1751,12 +1703,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_min_f32_e32
-    ; GCN: renamable $vgpr0 = nofpexcept V_MIN_F32_e32 1, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MIN_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_MIN_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_MIN_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F32_e32 1, undef %1:vgpr_32, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_MIN_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F32_e32 2, undef %1:vgpr_32, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_MIN_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F32_e32 3, undef %1:vgpr_32, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_F32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_F32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_F32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_MIN_F32_e32 1, undef %0:vgpr_32, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_MIN_F32_e32 2, undef %0:vgpr_32, implicit $exec, implicit $mode
@@ -1772,12 +1724,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_max_f32_e32
-    ; GCN: renamable $vgpr0 = nofpexcept V_MAX_F32_e32 1, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MAX_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_MAX_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_MAX_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e32 1, undef %1:vgpr_32, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_MAX_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e32 2, undef %1:vgpr_32, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_MAX_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e32 3, undef %1:vgpr_32, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_F32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_F32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_F32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_MAX_F32_e32 1, undef %0:vgpr_32, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_MAX_F32_e32 2, undef %0:vgpr_32, implicit $exec, implicit $mode
@@ -1793,12 +1745,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_min_i32_e32
-    ; GCN: renamable $vgpr0 = V_MIN_I32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: renamable $vgpr1 = V_MIN_I32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_MIN_I32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_MIN_I32_e32_:%[0-9]+]]:vgpr_32 = V_MIN_I32_e32 1, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_MIN_I32_e32_1:%[0-9]+]]:vgpr_32 = V_MIN_I32_e32 2, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_MIN_I32_e32_2:%[0-9]+]]:vgpr_32 = V_MIN_I32_e32 3, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_I32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_I32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_I32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MIN_I32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MIN_I32_e32 2, undef %0:vgpr_32, implicit $exec
@@ -1814,12 +1766,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_max_i32_e32
-    ; GCN: renamable $vgpr0 = V_MAX_I32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: renamable $vgpr1 = V_MAX_I32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_MAX_I32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_MAX_I32_e32_:%[0-9]+]]:vgpr_32 = V_MAX_I32_e32 1, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_MAX_I32_e32_1:%[0-9]+]]:vgpr_32 = V_MAX_I32_e32 2, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_MAX_I32_e32_2:%[0-9]+]]:vgpr_32 = V_MAX_I32_e32 3, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_I32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_I32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_I32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MAX_I32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MAX_I32_e32 2, undef %0:vgpr_32, implicit $exec
@@ -1835,12 +1787,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_min_u32_e32
-    ; GCN: renamable $vgpr0 = V_MIN_U32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: renamable $vgpr1 = V_MIN_U32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_MIN_U32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_MIN_U32_e32_:%[0-9]+]]:vgpr_32 = V_MIN_U32_e32 1, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_MIN_U32_e32_1:%[0-9]+]]:vgpr_32 = V_MIN_U32_e32 2, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_MIN_U32_e32_2:%[0-9]+]]:vgpr_32 = V_MIN_U32_e32 3, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_U32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_U32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_U32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MIN_U32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MIN_U32_e32 2, undef %0:vgpr_32, implicit $exec
@@ -1856,12 +1808,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_max_u32_e32
-    ; GCN: renamable $vgpr0 = V_MAX_U32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: renamable $vgpr1 = V_MAX_U32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_MAX_U32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_MAX_U32_e32_:%[0-9]+]]:vgpr_32 = V_MAX_U32_e32 1, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_MAX_U32_e32_1:%[0-9]+]]:vgpr_32 = V_MAX_U32_e32 2, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_MAX_U32_e32_2:%[0-9]+]]:vgpr_32 = V_MAX_U32_e32 3, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_U32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_U32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_U32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MAX_U32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MAX_U32_e32 2, undef %0:vgpr_32, implicit $exec
@@ -1877,12 +1829,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_lshrrev_b32_e32
-    ; GCN: renamable $vgpr0 = V_LSHRREV_B32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_LSHRREV_B32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_LSHRREV_B32_e32_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e32 1, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_LSHRREV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e32 2, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_LSHRREV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e32 3, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_LSHRREV_B32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_LSHRREV_B32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_LSHRREV_B32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_LSHRREV_B32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_LSHRREV_B32_e32 2, undef %0:vgpr_32, implicit $exec
@@ -1898,12 +1850,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_lshlrev_b32_e32
-    ; GCN: renamable $vgpr0 = V_LSHLREV_B32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: renamable $vgpr1 = V_LSHLREV_B32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_LSHLREV_B32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_LSHLREV_B32_e32_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e32 1, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_LSHLREV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e32 2, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_LSHLREV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e32 3, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_LSHLREV_B32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_LSHLREV_B32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_LSHLREV_B32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_LSHLREV_B32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_LSHLREV_B32_e32 2, undef %0:vgpr_32, implicit $exec
@@ -1919,12 +1871,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_ashrrev_i32_e32
-    ; GCN: renamable $vgpr0 = V_ASHRREV_I32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: renamable $vgpr1 = V_ASHRREV_I32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_ASHRREV_I32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_ASHRREV_I32_e32_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e32 1, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_ASHRREV_I32_e32_1:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e32 2, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_ASHRREV_I32_e32_2:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e32 3, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_ASHRREV_I32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_ASHRREV_I32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_ASHRREV_I32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_ASHRREV_I32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_ASHRREV_I32_e32 2, undef %0:vgpr_32, implicit $exec
@@ -1940,12 +1892,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_and_b32_e32
-    ; GCN: renamable $vgpr0 = V_AND_B32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: renamable $vgpr1 = V_AND_B32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_AND_B32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_AND_B32_e32_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 2, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_AND_B32_e32_2:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 3, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_AND_B32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_AND_B32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_AND_B32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_AND_B32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_AND_B32_e32 2, undef %0:vgpr_32, implicit $exec
@@ -1961,12 +1913,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_or_b32_e32
-    ; GCN: renamable $vgpr0 = V_OR_B32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: renamable $vgpr1 = V_OR_B32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_OR_B32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_OR_B32_e32_:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 1, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_OR_B32_e32_1:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 2, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_OR_B32_e32_2:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 3, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_OR_B32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_OR_B32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_OR_B32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_OR_B32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_OR_B32_e32 2, undef %0:vgpr_32, implicit $exec
@@ -1982,12 +1934,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_xor_b32_e32
-    ; GCN: renamable $vgpr0 = V_XOR_B32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: renamable $vgpr1 = V_XOR_B32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_XOR_B32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 1, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_XOR_B32_e32_1:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 2, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_XOR_B32_e32_2:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 3, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_XOR_B32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_XOR_B32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_XOR_B32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_XOR_B32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_XOR_B32_e32 2, undef %0:vgpr_32, implicit $exec
@@ -2003,12 +1955,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_madak_f32
-    ; GCN: renamable $vgpr0 = nofpexcept V_MADAK_F32 1, undef $vgpr0, 1, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MADAK_F32 2, undef $vgpr0, 2, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_MADAK_F32 3, undef $vgpr0, 3, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_MADAK_F32_:%[0-9]+]]:vgpr_32 = nofpexcept V_MADAK_F32 1, undef %1:vgpr_32, 1, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_MADAK_F32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MADAK_F32 2, undef %1:vgpr_32, 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_MADAK_F32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MADAK_F32 3, undef %1:vgpr_32, 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MADAK_F32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MADAK_F32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MADAK_F32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_MADAK_F32 1, undef %0:vgpr_32, 1, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_MADAK_F32 2, undef %0:vgpr_32, 2, implicit $exec, implicit $mode
@@ -2024,12 +1976,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_add_u32_e32
-    ; GCN: renamable $vgpr0 = V_ADD_U32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: renamable $vgpr1 = V_ADD_U32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_ADD_U32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 1, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 2, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 3, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_U32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_U32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_U32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_ADD_U32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_ADD_U32_e32 2, undef %0:vgpr_32, implicit $exec
@@ -2045,12 +1997,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_sub_u32_e32
-    ; GCN: renamable $vgpr0 = V_SUB_U32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: renamable $vgpr1 = V_SUB_U32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_SUB_U32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_SUB_U32_e32_:%[0-9]+]]:vgpr_32 = V_SUB_U32_e32 1, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_SUB_U32_e32_1:%[0-9]+]]:vgpr_32 = V_SUB_U32_e32 2, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_SUB_U32_e32_2:%[0-9]+]]:vgpr_32 = V_SUB_U32_e32 3, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_SUB_U32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_SUB_U32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_SUB_U32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_SUB_U32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_SUB_U32_e32 2, undef %0:vgpr_32, implicit $exec
@@ -2066,12 +2018,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_subrev_u32_e32
-    ; GCN: renamable $vgpr0 = V_SUBREV_U32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: renamable $vgpr1 = V_SUBREV_U32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_SUBREV_U32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_SUBREV_U32_e32_:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e32 1, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_SUBREV_U32_e32_1:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e32 2, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_SUBREV_U32_e32_2:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e32 3, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_SUBREV_U32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_SUBREV_U32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_SUBREV_U32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_SUBREV_U32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_SUBREV_U32_e32 2, undef %0:vgpr_32, implicit $exec
@@ -2087,12 +2039,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_bfm_b32_e32
-    ; GCN: renamable $vgpr0 = V_BFM_B32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: renamable $vgpr1 = V_BFM_B32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_BFM_B32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_BFM_B32_e32_:%[0-9]+]]:vgpr_32 = V_BFM_B32_e32 1, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_BFM_B32_e32_1:%[0-9]+]]:vgpr_32 = V_BFM_B32_e32 2, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_BFM_B32_e32_2:%[0-9]+]]:vgpr_32 = V_BFM_B32_e32 3, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_BFM_B32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_BFM_B32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_BFM_B32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_BFM_B32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_BFM_B32_e32 2, undef %0:vgpr_32, implicit $exec
@@ -2108,12 +2060,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_bcnt_u32_b32_e32
-    ; GCN: renamable $vgpr0 = V_BCNT_U32_B32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: renamable $vgpr1 = V_BCNT_U32_B32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_BCNT_U32_B32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_BCNT_U32_B32_e32_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e32 1, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_BCNT_U32_B32_e32_1:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e32 2, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_BCNT_U32_B32_e32_2:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e32 3, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_BCNT_U32_B32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_BCNT_U32_B32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_BCNT_U32_B32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_BCNT_U32_B32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_BCNT_U32_B32_e32 2, undef %0:vgpr_32, implicit $exec
@@ -2129,12 +2081,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_mbcnt_lo_u32_b32_e32
-    ; GCN: renamable $vgpr0 = V_MBCNT_LO_U32_B32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: renamable $vgpr1 = V_MBCNT_LO_U32_B32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_MBCNT_LO_U32_B32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_MBCNT_LO_U32_B32_e32_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e32 1, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_MBCNT_LO_U32_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e32 2, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_MBCNT_LO_U32_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e32 3, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MBCNT_LO_U32_B32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MBCNT_LO_U32_B32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MBCNT_LO_U32_B32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MBCNT_LO_U32_B32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MBCNT_LO_U32_B32_e32 2, undef %0:vgpr_32, implicit $exec
@@ -2150,12 +2102,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_mbcnt_hi_u32_b32_e32
-    ; GCN: renamable $vgpr0 = V_MBCNT_HI_U32_B32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: renamable $vgpr1 = V_MBCNT_HI_U32_B32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_MBCNT_HI_U32_B32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_MBCNT_HI_U32_B32_e32_:%[0-9]+]]:vgpr_32 = V_MBCNT_HI_U32_B32_e32 1, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_MBCNT_HI_U32_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MBCNT_HI_U32_B32_e32 2, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_MBCNT_HI_U32_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MBCNT_HI_U32_B32_e32 3, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MBCNT_HI_U32_B32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MBCNT_HI_U32_B32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MBCNT_HI_U32_B32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MBCNT_HI_U32_B32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MBCNT_HI_U32_B32_e32 2, undef %0:vgpr_32, implicit $exec
@@ -2171,12 +2123,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_ldexp_f32_e32
-    ; GCN: renamable $vgpr0 = nofpexcept V_LDEXP_F32_e32 1, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_LDEXP_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_LDEXP_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_LDEXP_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_LDEXP_F32_e32 1, undef %1:vgpr_32, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_LDEXP_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_LDEXP_F32_e32 2, undef %1:vgpr_32, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_LDEXP_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_LDEXP_F32_e32 3, undef %1:vgpr_32, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_LDEXP_F32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_LDEXP_F32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_LDEXP_F32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_LDEXP_F32_e32 1, undef %0:vgpr_32, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_LDEXP_F32_e32 2, undef %0:vgpr_32, implicit $exec, implicit $mode
@@ -2192,12 +2144,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_pknorm_i16_f32_e32
-    ; GCN: renamable $vgpr0 = V_CVT_PKNORM_I16_F32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: renamable $vgpr1 = V_CVT_PKNORM_I16_F32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_CVT_PKNORM_I16_F32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_CVT_PKNORM_I16_F32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_PKNORM_I16_F32_e32 1, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_CVT_PKNORM_I16_F32_e32_1:%[0-9]+]]:vgpr_32 = V_CVT_PKNORM_I16_F32_e32 2, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_CVT_PKNORM_I16_F32_e32_2:%[0-9]+]]:vgpr_32 = V_CVT_PKNORM_I16_F32_e32 3, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_PKNORM_I16_F32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_PKNORM_I16_F32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_PKNORM_I16_F32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_CVT_PKNORM_I16_F32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_CVT_PKNORM_I16_F32_e32 2, undef %0:vgpr_32, implicit $exec
@@ -2213,12 +2165,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_pknorm_u16_f32_e32
-    ; GCN: renamable $vgpr0 = V_CVT_PKNORM_U16_F32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: renamable $vgpr1 = V_CVT_PKNORM_U16_F32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_CVT_PKNORM_U16_F32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_CVT_PKNORM_U16_F32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_PKNORM_U16_F32_e32 1, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_CVT_PKNORM_U16_F32_e32_1:%[0-9]+]]:vgpr_32 = V_CVT_PKNORM_U16_F32_e32 2, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_CVT_PKNORM_U16_F32_e32_2:%[0-9]+]]:vgpr_32 = V_CVT_PKNORM_U16_F32_e32 3, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_PKNORM_U16_F32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_PKNORM_U16_F32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_PKNORM_U16_F32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_CVT_PKNORM_U16_F32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_CVT_PKNORM_U16_F32_e32 2, undef %0:vgpr_32, implicit $exec
@@ -2234,12 +2186,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_pkrtz_f16_f32_e32
-    ; GCN: renamable $vgpr0 = nofpexcept V_CVT_PKRTZ_F16_F32_e32 1, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_CVT_PKRTZ_F16_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_CVT_PKRTZ_F16_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_CVT_PKRTZ_F16_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_PKRTZ_F16_F32_e32 1, undef %1:vgpr_32, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_CVT_PKRTZ_F16_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_PKRTZ_F16_F32_e32 2, undef %1:vgpr_32, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_CVT_PKRTZ_F16_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_PKRTZ_F16_F32_e32 3, undef %1:vgpr_32, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_PKRTZ_F16_F32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_PKRTZ_F16_F32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_PKRTZ_F16_F32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_CVT_PKRTZ_F16_F32_e32 1, undef %0:vgpr_32, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_CVT_PKRTZ_F16_F32_e32 2, undef %0:vgpr_32, implicit $exec, implicit $mode
@@ -2255,12 +2207,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_pk_u16_u32_e32
-    ; GCN: renamable $vgpr0 = V_CVT_PK_U16_U32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: renamable $vgpr1 = V_CVT_PK_U16_U32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_CVT_PK_U16_U32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_CVT_PK_U16_U32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_PK_U16_U32_e32 1, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_CVT_PK_U16_U32_e32_1:%[0-9]+]]:vgpr_32 = V_CVT_PK_U16_U32_e32 2, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_CVT_PK_U16_U32_e32_2:%[0-9]+]]:vgpr_32 = V_CVT_PK_U16_U32_e32 3, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_PK_U16_U32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_PK_U16_U32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_PK_U16_U32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_CVT_PK_U16_U32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_CVT_PK_U16_U32_e32 2, undef %0:vgpr_32, implicit $exec
@@ -2276,12 +2228,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_pk_i16_i32_e32
-    ; GCN: renamable $vgpr0 = V_CVT_PK_I16_I32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: renamable $vgpr1 = V_CVT_PK_I16_I32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_CVT_PK_I16_I32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_CVT_PK_I16_I32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_PK_I16_I32_e32 1, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_CVT_PK_I16_I32_e32_1:%[0-9]+]]:vgpr_32 = V_CVT_PK_I16_I32_e32 2, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_CVT_PK_I16_I32_e32_2:%[0-9]+]]:vgpr_32 = V_CVT_PK_I16_I32_e32 3, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_PK_I16_I32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_PK_I16_I32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_PK_I16_I32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_CVT_PK_I16_I32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_CVT_PK_I16_I32_e32 2, undef %0:vgpr_32, implicit $exec
@@ -2297,12 +2249,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_min_legacy_f32_e32
-    ; GCN: renamable $vgpr0 = nofpexcept V_MIN_LEGACY_F32_e32 1, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MIN_LEGACY_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_MIN_LEGACY_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_MIN_LEGACY_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_LEGACY_F32_e32 1, undef %1:vgpr_32, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_MIN_LEGACY_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_LEGACY_F32_e32 2, undef %1:vgpr_32, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_MIN_LEGACY_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_LEGACY_F32_e32 3, undef %1:vgpr_32, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_LEGACY_F32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_LEGACY_F32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_LEGACY_F32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_MIN_LEGACY_F32_e32 1, undef %0:vgpr_32, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_MIN_LEGACY_F32_e32 2, undef %0:vgpr_32, implicit $exec, implicit $mode
@@ -2318,12 +2270,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_max_legacy_f32_e32
-    ; GCN: renamable $vgpr0 = nofpexcept V_MAX_LEGACY_F32_e32 1, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MAX_LEGACY_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_MAX_LEGACY_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_MAX_LEGACY_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_LEGACY_F32_e32 1, undef %1:vgpr_32, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_MAX_LEGACY_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_LEGACY_F32_e32 2, undef %1:vgpr_32, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_MAX_LEGACY_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_LEGACY_F32_e32 3, undef %1:vgpr_32, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_LEGACY_F32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_LEGACY_F32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_LEGACY_F32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_MAX_LEGACY_F32_e32 1, undef %0:vgpr_32, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_MAX_LEGACY_F32_e32 2, undef %0:vgpr_32, implicit $exec, implicit $mode
@@ -2339,12 +2291,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_lshr_b32_e32
-    ; GCN: renamable $vgpr0 = V_LSHR_B32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: renamable $vgpr1 = V_LSHR_B32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_LSHR_B32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_LSHR_B32_e32_:%[0-9]+]]:vgpr_32 = V_LSHR_B32_e32 1, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_LSHR_B32_e32_1:%[0-9]+]]:vgpr_32 = V_LSHR_B32_e32 2, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_LSHR_B32_e32_2:%[0-9]+]]:vgpr_32 = V_LSHR_B32_e32 3, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_LSHR_B32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_LSHR_B32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_LSHR_B32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_LSHR_B32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_LSHR_B32_e32 2, undef %0:vgpr_32, implicit $exec
@@ -2360,12 +2312,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_lshl_b32_e32
-    ; GCN: renamable $vgpr0 = V_LSHL_B32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: renamable $vgpr1 = V_LSHL_B32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_LSHL_B32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_LSHL_B32_e32_:%[0-9]+]]:vgpr_32 = V_LSHL_B32_e32 1, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_LSHL_B32_e32_1:%[0-9]+]]:vgpr_32 = V_LSHL_B32_e32 2, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_LSHL_B32_e32_2:%[0-9]+]]:vgpr_32 = V_LSHL_B32_e32 3, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_LSHL_B32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_LSHL_B32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_LSHL_B32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_LSHL_B32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_LSHL_B32_e32 2, undef %0:vgpr_32, implicit $exec
@@ -2381,12 +2333,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_ashr_i32_e32
-    ; GCN: renamable $vgpr0 = V_ASHR_I32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: renamable $vgpr1 = V_ASHR_I32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_ASHR_I32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_ASHR_I32_e32_:%[0-9]+]]:vgpr_32 = V_ASHR_I32_e32 1, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_ASHR_I32_e32_1:%[0-9]+]]:vgpr_32 = V_ASHR_I32_e32 2, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_ASHR_I32_e32_2:%[0-9]+]]:vgpr_32 = V_ASHR_I32_e32 3, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_ASHR_I32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_ASHR_I32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_ASHR_I32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_ASHR_I32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_ASHR_I32_e32 2, undef %0:vgpr_32, implicit $exec
@@ -2402,12 +2354,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_xnor_b32_e32
-    ; GCN: renamable $vgpr0 = V_XNOR_B32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: renamable $vgpr1 = V_XNOR_B32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_XNOR_B32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_XNOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XNOR_B32_e32 1, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_XNOR_B32_e32_1:%[0-9]+]]:vgpr_32 = V_XNOR_B32_e32 2, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_XNOR_B32_e32_2:%[0-9]+]]:vgpr_32 = V_XNOR_B32_e32 3, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_XNOR_B32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_XNOR_B32_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_XNOR_B32_e32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_XNOR_B32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_XNOR_B32_e32 2, undef %0:vgpr_32, implicit $exec
@@ -2423,12 +2375,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_fmamk_f32
-    ; GCN: renamable $vgpr0 = nofpexcept V_FMAMK_F32 1, 1, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_FMAMK_F32 2, 2, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_FMAMK_F32 3, 3, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_FMAMK_F32_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMAMK_F32 1, 1, undef %1:vgpr_32, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_FMAMK_F32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_FMAMK_F32 2, 2, undef %1:vgpr_32, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_FMAMK_F32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_FMAMK_F32 3, 3, undef %1:vgpr_32, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_FMAMK_F32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_FMAMK_F32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_FMAMK_F32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_FMAMK_F32 1, 1, undef %0:vgpr_32, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_FMAMK_F32 2, 2, undef %0:vgpr_32, implicit $exec, implicit $mode
@@ -2444,12 +2396,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_fmaak_f32
-    ; GCN: renamable $vgpr0 = nofpexcept V_FMAAK_F32 1, undef $vgpr0, 1, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_FMAAK_F32 2, undef $vgpr0, 2, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_FMAAK_F32 3, undef $vgpr0, 3, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_FMAAK_F32_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMAAK_F32 1, undef %1:vgpr_32, 1, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_FMAAK_F32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_FMAAK_F32 2, undef %1:vgpr_32, 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_FMAAK_F32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_FMAAK_F32 3, undef %1:vgpr_32, 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_FMAAK_F32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_FMAAK_F32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_FMAAK_F32_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_FMAAK_F32 1, undef %0:vgpr_32, 1, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_FMAAK_F32 2, undef %0:vgpr_32, 2, implicit $exec, implicit $mode
@@ -2465,12 +2417,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_mad_legacy_f32_e64
-    ; GCN: renamable $vgpr0 = nofpexcept V_MAD_LEGACY_F32_e64 0, 1, 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MAD_LEGACY_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_MAD_LEGACY_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_MAD_LEGACY_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAD_LEGACY_F32_e64 0, 1, 0, 1, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_MAD_LEGACY_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MAD_LEGACY_F32_e64 0, 2, 0, 2, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_MAD_LEGACY_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MAD_LEGACY_F32_e64 0, 3, 0, 3, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MAD_LEGACY_F32_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MAD_LEGACY_F32_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MAD_LEGACY_F32_e64_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_MAD_LEGACY_F32_e64 0, 1, 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_MAD_LEGACY_F32_e64 0, 2, 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
@@ -2486,12 +2438,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_mad_f32_e64
-    ; GCN: renamable $vgpr0 = nofpexcept V_MAD_F32_e64 0, 1, 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MAD_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_MAD_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_MAD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAD_F32_e64 0, 1, 0, 1, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_MAD_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MAD_F32_e64 0, 2, 0, 2, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_MAD_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MAD_F32_e64 0, 3, 0, 3, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MAD_F32_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MAD_F32_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MAD_F32_e64_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_MAD_F32_e64 0, 1, 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_MAD_F32_e64 0, 2, 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
@@ -2507,12 +2459,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_fma_legacy_f32_e64
-    ; GCN: renamable $vgpr0 = nofpexcept V_FMA_LEGACY_F32_e64 0, 1, 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_FMA_LEGACY_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_FMA_LEGACY_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_FMA_LEGACY_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_LEGACY_F32_e64 0, 1, 0, 1, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_FMA_LEGACY_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_LEGACY_F32_e64 0, 2, 0, 2, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_FMA_LEGACY_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_LEGACY_F32_e64 0, 3, 0, 3, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_FMA_LEGACY_F32_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_FMA_LEGACY_F32_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_FMA_LEGACY_F32_e64_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_FMA_LEGACY_F32_e64 0, 1, 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_FMA_LEGACY_F32_e64 0, 2, 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
@@ -2528,12 +2480,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_fma_f32_e64
-    ; GCN: renamable $vgpr0 = nofpexcept V_FMA_F32_e64 0, 1, 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_FMA_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_FMA_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_FMA_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 0, 1, 0, 1, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_FMA_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 0, 2, 0, 2, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_FMA_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 0, 3, 0, 3, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_FMA_F32_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_FMA_F32_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_FMA_F32_e64_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_FMA_F32_e64 0, 1, 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_FMA_F32_e64 0, 2, 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
@@ -2549,12 +2501,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_mad_i32_i24_e64
-    ; GCN: renamable $vgpr0 = V_MAD_I32_I24_e64 1, 1, undef $vgpr0, 0, implicit $exec
-    ; GCN-NEXT: renamable $vgpr1 = V_MAD_I32_I24_e64 2, 2, undef $vgpr0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_MAD_I32_I24_e64 3, 3, undef $vgpr0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_MAD_I32_I24_e64_:%[0-9]+]]:vgpr_32 = V_MAD_I32_I24_e64 1, 1, undef %1:vgpr_32, 0, implicit $exec
+    ; GCN-NEXT: [[V_MAD_I32_I24_e64_1:%[0-9]+]]:vgpr_32 = V_MAD_I32_I24_e64 2, 2, undef %1:vgpr_32, 0, implicit $exec
+    ; GCN-NEXT: [[V_MAD_I32_I24_e64_2:%[0-9]+]]:vgpr_32 = V_MAD_I32_I24_e64 3, 3, undef %1:vgpr_32, 0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MAD_I32_I24_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MAD_I32_I24_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MAD_I32_I24_e64_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MAD_I32_I24_e64 1, 1, undef %0:vgpr_32, 0, implicit $exec
     %2:vgpr_32 = V_MAD_I32_I24_e64 2, 2, undef %0:vgpr_32, 0, implicit $exec
@@ -2570,12 +2522,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_mad_u32_u24_e64
-    ; GCN: renamable $vgpr0 = V_MAD_U32_U24_e64 1, 1, undef $vgpr0, 0, implicit $exec
-    ; GCN-NEXT: renamable $vgpr1 = V_MAD_U32_U24_e64 2, 2, undef $vgpr0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_MAD_U32_U24_e64 3, 3, undef $vgpr0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_MAD_U32_U24_e64_:%[0-9]+]]:vgpr_32 = V_MAD_U32_U24_e64 1, 1, undef %1:vgpr_32, 0, implicit $exec
+    ; GCN-NEXT: [[V_MAD_U32_U24_e64_1:%[0-9]+]]:vgpr_32 = V_MAD_U32_U24_e64 2, 2, undef %1:vgpr_32, 0, implicit $exec
+    ; GCN-NEXT: [[V_MAD_U32_U24_e64_2:%[0-9]+]]:vgpr_32 = V_MAD_U32_U24_e64 3, 3, undef %1:vgpr_32, 0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MAD_U32_U24_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MAD_U32_U24_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MAD_U32_U24_e64_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MAD_U32_U24_e64 1, 1, undef %0:vgpr_32, 0, implicit $exec
     %2:vgpr_32 = V_MAD_U32_U24_e64 2, 2, undef %0:vgpr_32, 0, implicit $exec
@@ -2591,12 +2543,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_lerp_u8_e64
-    ; GCN: renamable $vgpr0 = V_LERP_U8_e64 1, 1, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: renamable $vgpr1 = V_LERP_U8_e64 2, 2, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_LERP_U8_e64 3, 3, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_LERP_U8_e64_:%[0-9]+]]:vgpr_32 = V_LERP_U8_e64 1, 1, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_LERP_U8_e64_1:%[0-9]+]]:vgpr_32 = V_LERP_U8_e64 2, 2, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_LERP_U8_e64_2:%[0-9]+]]:vgpr_32 = V_LERP_U8_e64 3, 3, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_LERP_U8_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_LERP_U8_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_LERP_U8_e64_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_LERP_U8_e64 1, 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_LERP_U8_e64 2, 2, undef %0:vgpr_32, implicit $exec
@@ -2612,12 +2564,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_fma_f64_e64
-    ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_FMA_F64_e64 0, 1, 0, 1, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_FMA_F64_e64 0, 2, 0, 2, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_FMA_F64_e64 0, 3, 0, 3, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN: [[V_FMA_F64_e64_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_FMA_F64_e64 0, 1, 0, 1, 0, undef %1:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_FMA_F64_e64_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_FMA_F64_e64 0, 2, 0, 2, 0, undef %1:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_FMA_F64_e64_2:%[0-9]+]]:vreg_64_align2 = nofpexcept V_FMA_F64_e64 0, 3, 0, 3, 0, undef %1:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_FMA_F64_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_FMA_F64_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_FMA_F64_e64_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vreg_64_align2 = nofpexcept V_FMA_F64_e64 0, 1, 0, 1, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
     %2:vreg_64_align2 = nofpexcept V_FMA_F64_e64 0, 2, 0, 2, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
@@ -2633,12 +2585,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_add_f64_e64
-    ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_ADD_F64_e64 0, 1, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_ADD_F64_e64 0, 2, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_ADD_F64_e64 0, 3, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, 1, 0, undef %1:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_ADD_F64_e64_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, 2, 0, undef %1:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_ADD_F64_e64_2:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, 3, 0, undef %1:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_F64_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_F64_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_F64_e64_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, 1, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
     %2:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, 2, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
@@ -2654,12 +2606,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_mul_f64_e64
-    ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_MUL_F64_e64 0, 1, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_MUL_F64_e64 0, 2, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_MUL_F64_e64 0, 3, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN: [[V_MUL_F64_e64_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_MUL_F64_e64 0, 1, 0, undef %1:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_MUL_F64_e64_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_MUL_F64_e64 0, 2, 0, undef %1:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_MUL_F64_e64_2:%[0-9]+]]:vreg_64_align2 = nofpexcept V_MUL_F64_e64 0, 3, 0, undef %1:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_F64_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_F64_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_F64_e64_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vreg_64_align2 = nofpexcept V_MUL_F64_e64 0, 1, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
     %2:vreg_64_align2 = nofpexcept V_MUL_F64_e64 0, 2, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
@@ -2675,12 +2627,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_min_f64_e64
-    ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_MIN_F64_e64 0, 1, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_MIN_F64_e64 0, 2, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_MIN_F64_e64 0, 3, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN: [[V_MIN_F64_e64_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_MIN_F64_e64 0, 1, 0, undef %1:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_MIN_F64_e64_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_MIN_F64_e64 0, 2, 0, undef %1:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_MIN_F64_e64_2:%[0-9]+]]:vreg_64_align2 = nofpexcept V_MIN_F64_e64 0, 3, 0, undef %1:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_F64_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_F64_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_F64_e64_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vreg_64_align2 = nofpexcept V_MIN_F64_e64 0, 1, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
     %2:vreg_64_align2 = nofpexcept V_MIN_F64_e64 0, 2, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
@@ -2696,12 +2648,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_max_f64_e64
-    ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_MAX_F64_e64 0, 1, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_MAX_F64_e64 0, 2, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_MAX_F64_e64 0, 3, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN: [[V_MAX_F64_e64_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_MAX_F64_e64 0, 1, 0, undef %1:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_MAX_F64_e64_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_MAX_F64_e64 0, 2, 0, undef %1:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_MAX_F64_e64_2:%[0-9]+]]:vreg_64_align2 = nofpexcept V_MAX_F64_e64 0, 3, 0, undef %1:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_F64_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_F64_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_F64_e64_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vreg_64_align2 = nofpexcept V_MAX_F64_e64 0, 1, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
     %2:vreg_64_align2 = nofpexcept V_MAX_F64_e64 0, 2, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
@@ -2717,12 +2669,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_mul_lo_u32_e64
-    ; GCN: renamable $vgpr0 = V_MUL_LO_U32_e64 1, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: renamable $vgpr1 = V_MUL_LO_U32_e64 2, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_MUL_LO_U32_e64 3, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 1, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 2, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_MUL_LO_U32_e64_2:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 3, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_LO_U32_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_LO_U32_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_LO_U32_e64_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MUL_LO_U32_e64 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MUL_LO_U32_e64 2, undef %0:vgpr_32, implicit $exec
@@ -2738,12 +2690,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_mul_hi_u32_e64
-    ; GCN: renamable $vgpr0 = V_MUL_HI_U32_e64 1, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: renamable $vgpr1 = V_MUL_HI_U32_e64 2, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_MUL_HI_U32_e64 3, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_MUL_HI_U32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 1, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_MUL_HI_U32_e64_1:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 2, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_MUL_HI_U32_e64_2:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 3, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_HI_U32_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_HI_U32_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_HI_U32_e64_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MUL_HI_U32_e64 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MUL_HI_U32_e64 2, undef %0:vgpr_32, implicit $exec
@@ -2759,12 +2711,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_mul_lo_i32_e64
-    ; GCN: renamable $vgpr0 = V_MUL_LO_I32_e64 1, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: renamable $vgpr1 = V_MUL_LO_I32_e64 2, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_MUL_LO_I32_e64 3, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_MUL_LO_I32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_LO_I32_e64 1, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_MUL_LO_I32_e64_1:%[0-9]+]]:vgpr_32 = V_MUL_LO_I32_e64 2, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_MUL_LO_I32_e64_2:%[0-9]+]]:vgpr_32 = V_MUL_LO_I32_e64 3, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_LO_I32_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_LO_I32_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_LO_I32_e64_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MUL_LO_I32_e64 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MUL_LO_I32_e64 2, undef %0:vgpr_32, implicit $exec
@@ -2780,12 +2732,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_mul_hi_i32_e64
-    ; GCN: renamable $vgpr0 = V_MUL_HI_I32_e64 1, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: renamable $vgpr1 = V_MUL_HI_I32_e64 2, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_MUL_HI_I32_e64 3, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_MUL_HI_I32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32_e64 1, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_MUL_HI_I32_e64_1:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32_e64 2, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_MUL_HI_I32_e64_2:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32_e64 3, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_HI_I32_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_HI_I32_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_HI_I32_e64_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MUL_HI_I32_e64 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MUL_HI_I32_e64 2, undef %0:vgpr_32, implicit $exec
@@ -2801,12 +2753,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cubeid_f32_e64
-    ; GCN: renamable $vgpr0 = V_CUBEID_F32_e64 0, 1, 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr1 = V_CUBEID_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_CUBEID_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_CUBEID_F32_e64_:%[0-9]+]]:vgpr_32 = V_CUBEID_F32_e64 0, 1, 0, 1, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_CUBEID_F32_e64_1:%[0-9]+]]:vgpr_32 = V_CUBEID_F32_e64 0, 2, 0, 2, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_CUBEID_F32_e64_2:%[0-9]+]]:vgpr_32 = V_CUBEID_F32_e64 0, 3, 0, 3, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CUBEID_F32_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CUBEID_F32_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CUBEID_F32_e64_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_CUBEID_F32_e64 0, 1, 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %2:vgpr_32 = V_CUBEID_F32_e64 0, 2, 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
@@ -2822,12 +2774,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cubesc_f32_e64
-    ; GCN: renamable $vgpr0 = V_CUBESC_F32_e64 0, 1, 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr1 = V_CUBESC_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_CUBESC_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_CUBESC_F32_e64_:%[0-9]+]]:vgpr_32 = V_CUBESC_F32_e64 0, 1, 0, 1, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_CUBESC_F32_e64_1:%[0-9]+]]:vgpr_32 = V_CUBESC_F32_e64 0, 2, 0, 2, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_CUBESC_F32_e64_2:%[0-9]+]]:vgpr_32 = V_CUBESC_F32_e64 0, 3, 0, 3, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CUBESC_F32_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CUBESC_F32_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CUBESC_F32_e64_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_CUBESC_F32_e64 0, 1, 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %2:vgpr_32 = V_CUBESC_F32_e64 0, 2, 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
@@ -2843,12 +2795,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cubetc_f32_e64
-    ; GCN: renamable $vgpr0 = V_CUBETC_F32_e64 0, 1, 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr1 = V_CUBETC_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_CUBETC_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_CUBETC_F32_e64_:%[0-9]+]]:vgpr_32 = V_CUBETC_F32_e64 0, 1, 0, 1, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_CUBETC_F32_e64_1:%[0-9]+]]:vgpr_32 = V_CUBETC_F32_e64 0, 2, 0, 2, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_CUBETC_F32_e64_2:%[0-9]+]]:vgpr_32 = V_CUBETC_F32_e64 0, 3, 0, 3, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CUBETC_F32_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CUBETC_F32_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CUBETC_F32_e64_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_CUBETC_F32_e64 0, 1, 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %2:vgpr_32 = V_CUBETC_F32_e64 0, 2, 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
@@ -2864,12 +2816,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cubema_f32_e64
-    ; GCN: renamable $vgpr0 = V_CUBEMA_F32_e64 0, 1, 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr1 = V_CUBEMA_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_CUBEMA_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_CUBEMA_F32_e64_:%[0-9]+]]:vgpr_32 = V_CUBEMA_F32_e64 0, 1, 0, 1, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_CUBEMA_F32_e64_1:%[0-9]+]]:vgpr_32 = V_CUBEMA_F32_e64 0, 2, 0, 2, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_CUBEMA_F32_e64_2:%[0-9]+]]:vgpr_32 = V_CUBEMA_F32_e64 0, 3, 0, 3, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CUBEMA_F32_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CUBEMA_F32_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CUBEMA_F32_e64_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_CUBEMA_F32_e64 0, 1, 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %2:vgpr_32 = V_CUBEMA_F32_e64 0, 2, 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
@@ -2885,12 +2837,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_bfe_u32_e64
-    ; GCN: renamable $vgpr0 = V_BFE_U32_e64 1, 1, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: renamable $vgpr1 = V_BFE_U32_e64 2, 2, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_BFE_U32_e64 3, 3, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 1, 1, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_BFE_U32_e64_1:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 2, 2, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_BFE_U32_e64_2:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 3, 3, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_BFE_U32_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_BFE_U32_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_BFE_U32_e64_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_BFE_U32_e64 1, 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_BFE_U32_e64 2, 2, undef %0:vgpr_32, implicit $exec
@@ -2906,12 +2858,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_bfe_i32_e64
-    ; GCN: renamable $vgpr0 = V_BFE_I32_e64 1, 1, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: renamable $vgpr1 = V_BFE_I32_e64 2, 2, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_BFE_I32_e64 3, 3, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 1, 1, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_BFE_I32_e64_1:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 2, 2, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_BFE_I32_e64_2:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 3, 3, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_BFE_I32_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_BFE_I32_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_BFE_I32_e64_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_BFE_I32_e64 1, 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_BFE_I32_e64 2, 2, undef %0:vgpr_32, implicit $exec
@@ -2927,12 +2879,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_bfi_b32_e64
-    ; GCN: renamable $vgpr0 = V_BFI_B32_e64 1, 1, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: renamable $vgpr1 = V_BFI_B32_e64 2, 2, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_BFI_B32_e64 3, 3, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_BFI_B32_e64_:%[0-9]+]]:vgpr_32 = V_BFI_B32_e64 1, 1, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_BFI_B32_e64_1:%[0-9]+]]:vgpr_32 = V_BFI_B32_e64 2, 2, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_BFI_B32_e64_2:%[0-9]+]]:vgpr_32 = V_BFI_B32_e64 3, 3, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_BFI_B32_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_BFI_B32_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_BFI_B32_e64_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_BFI_B32_e64 1, 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_BFI_B32_e64 2, 2, undef %0:vgpr_32, implicit $exec
@@ -2948,12 +2900,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_alignbit_b32_e64
-    ; GCN: renamable $vgpr0 = V_ALIGNBIT_B32_e64 1, 1, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: renamable $vgpr1 = V_ALIGNBIT_B32_e64 2, 2, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_ALIGNBIT_B32_e64 3, 3, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_ALIGNBIT_B32_e64_:%[0-9]+]]:vgpr_32 = V_ALIGNBIT_B32_e64 1, 1, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_ALIGNBIT_B32_e64_1:%[0-9]+]]:vgpr_32 = V_ALIGNBIT_B32_e64 2, 2, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_ALIGNBIT_B32_e64_2:%[0-9]+]]:vgpr_32 = V_ALIGNBIT_B32_e64 3, 3, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_ALIGNBIT_B32_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_ALIGNBIT_B32_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_ALIGNBIT_B32_e64_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_ALIGNBIT_B32_e64 1, 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_ALIGNBIT_B32_e64 2, 2, undef %0:vgpr_32, implicit $exec
@@ -2969,12 +2921,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_alignbyte_b32_e64
-    ; GCN: renamable $vgpr0 = V_ALIGNBYTE_B32_e64 1, 1, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: renamable $vgpr1 = V_ALIGNBYTE_B32_e64 2, 2, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_ALIGNBYTE_B32_e64 3, 3, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_ALIGNBYTE_B32_e64_:%[0-9]+]]:vgpr_32 = V_ALIGNBYTE_B32_e64 1, 1, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_ALIGNBYTE_B32_e64_1:%[0-9]+]]:vgpr_32 = V_ALIGNBYTE_B32_e64 2, 2, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_ALIGNBYTE_B32_e64_2:%[0-9]+]]:vgpr_32 = V_ALIGNBYTE_B32_e64 3, 3, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_ALIGNBYTE_B32_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_ALIGNBYTE_B32_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_ALIGNBYTE_B32_e64_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_ALIGNBYTE_B32_e64 1, 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_ALIGNBYTE_B32_e64 2, 2, undef %0:vgpr_32, implicit $exec
@@ -2990,12 +2942,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_min3_i32_e64
-    ; GCN: renamable $vgpr0 = V_MIN3_I32_e64 1, 1, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: renamable $vgpr1 = V_MIN3_I32_e64 2, 2, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_MIN3_I32_e64 3, 3, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_MIN3_I32_e64_:%[0-9]+]]:vgpr_32 = V_MIN3_I32_e64 1, 1, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_MIN3_I32_e64_1:%[0-9]+]]:vgpr_32 = V_MIN3_I32_e64 2, 2, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_MIN3_I32_e64_2:%[0-9]+]]:vgpr_32 = V_MIN3_I32_e64 3, 3, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MIN3_I32_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MIN3_I32_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MIN3_I32_e64_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MIN3_I32_e64 1, 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MIN3_I32_e64 2, 2, undef %0:vgpr_32, implicit $exec
@@ -3011,12 +2963,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_min3_u32_e64
-    ; GCN: renamable $vgpr0 = V_MIN3_U32_e64 1, 1, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: renamable $vgpr1 = V_MIN3_U32_e64 2, 2, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_MIN3_U32_e64 3, 3, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_MIN3_U32_e64_:%[0-9]+]]:vgpr_32 = V_MIN3_U32_e64 1, 1, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_MIN3_U32_e64_1:%[0-9]+]]:vgpr_32 = V_MIN3_U32_e64 2, 2, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_MIN3_U32_e64_2:%[0-9]+]]:vgpr_32 = V_MIN3_U32_e64 3, 3, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MIN3_U32_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MIN3_U32_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MIN3_U32_e64_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MIN3_U32_e64 1, 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MIN3_U32_e64 2, 2, undef %0:vgpr_32, implicit $exec
@@ -3032,12 +2984,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_max3_i32_e64
-    ; GCN: renamable $vgpr0 = V_MAX3_I32_e64 1, 1, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: renamable $vgpr1 = V_MAX3_I32_e64 2, 2, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_MAX3_I32_e64 3, 3, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_MAX3_I32_e64_:%[0-9]+]]:vgpr_32 = V_MAX3_I32_e64 1, 1, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_MAX3_I32_e64_1:%[0-9]+]]:vgpr_32 = V_MAX3_I32_e64 2, 2, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_MAX3_I32_e64_2:%[0-9]+]]:vgpr_32 = V_MAX3_I32_e64 3, 3, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MAX3_I32_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MAX3_I32_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MAX3_I32_e64_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MAX3_I32_e64 1, 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MAX3_I32_e64 2, 2, undef %0:vgpr_32, implicit $exec
@@ -3053,12 +3005,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_max3_u32_e64
-    ; GCN: renamable $vgpr0 = V_MAX3_U32_e64 1, 1, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: renamable $vgpr1 = V_MAX3_U32_e64 2, 2, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_MAX3_U32_e64 3, 3, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_MAX3_U32_e64_:%[0-9]+]]:vgpr_32 = V_MAX3_U32_e64 1, 1, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_MAX3_U32_e64_1:%[0-9]+]]:vgpr_32 = V_MAX3_U32_e64 2, 2, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_MAX3_U32_e64_2:%[0-9]+]]:vgpr_32 = V_MAX3_U32_e64 3, 3, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MAX3_U32_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MAX3_U32_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MAX3_U32_e64_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MAX3_U32_e64 1, 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MAX3_U32_e64 2, 2, undef %0:vgpr_32, implicit $exec
@@ -3074,12 +3026,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_med3_i32_e64
-    ; GCN: renamable $vgpr0 = V_MED3_I32_e64 1, 1, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: renamable $vgpr1 = V_MED3_I32_e64 2, 2, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_MED3_I32_e64 3, 3, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_MED3_I32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I32_e64 1, 1, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_MED3_I32_e64_1:%[0-9]+]]:vgpr_32 = V_MED3_I32_e64 2, 2, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_MED3_I32_e64_2:%[0-9]+]]:vgpr_32 = V_MED3_I32_e64 3, 3, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MED3_I32_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MED3_I32_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MED3_I32_e64_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MED3_I32_e64 1, 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MED3_I32_e64 2, 2, undef %0:vgpr_32, implicit $exec
@@ -3095,12 +3047,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_med3_u32_e64
-    ; GCN: renamable $vgpr0 = V_MED3_U32_e64 1, 1, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: renamable $vgpr1 = V_MED3_U32_e64 2, 2, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_MED3_U32_e64 3, 3, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_MED3_U32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U32_e64 1, 1, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_MED3_U32_e64_1:%[0-9]+]]:vgpr_32 = V_MED3_U32_e64 2, 2, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_MED3_U32_e64_2:%[0-9]+]]:vgpr_32 = V_MED3_U32_e64 3, 3, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MED3_U32_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MED3_U32_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MED3_U32_e64_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MED3_U32_e64 1, 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MED3_U32_e64 2, 2, undef %0:vgpr_32, implicit $exec
@@ -3116,12 +3068,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_min3_f32_e64
-    ; GCN: renamable $vgpr0 = V_MIN3_F32_e64 0, 1, 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr1 = V_MIN3_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_MIN3_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_MIN3_F32_e64_:%[0-9]+]]:vgpr_32 = V_MIN3_F32_e64 0, 1, 0, 1, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_MIN3_F32_e64_1:%[0-9]+]]:vgpr_32 = V_MIN3_F32_e64 0, 2, 0, 2, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_MIN3_F32_e64_2:%[0-9]+]]:vgpr_32 = V_MIN3_F32_e64 0, 3, 0, 3, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MIN3_F32_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MIN3_F32_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MIN3_F32_e64_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MIN3_F32_e64 0, 1, 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %2:vgpr_32 = V_MIN3_F32_e64 0, 2, 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
@@ -3137,12 +3089,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_max3_f32_e64
-    ; GCN: renamable $vgpr0 = V_MAX3_F32_e64 0, 1, 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr1 = V_MAX3_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_MAX3_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_MAX3_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAX3_F32_e64 0, 1, 0, 1, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_MAX3_F32_e64_1:%[0-9]+]]:vgpr_32 = V_MAX3_F32_e64 0, 2, 0, 2, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_MAX3_F32_e64_2:%[0-9]+]]:vgpr_32 = V_MAX3_F32_e64 0, 3, 0, 3, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MAX3_F32_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MAX3_F32_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MAX3_F32_e64_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MAX3_F32_e64 0, 1, 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %2:vgpr_32 = V_MAX3_F32_e64 0, 2, 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
@@ -3158,12 +3110,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_med3_f32_e64
-    ; GCN: renamable $vgpr0 = V_MED3_F32_e64 0, 1, 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr1 = V_MED3_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_MED3_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_MED3_F32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_F32_e64 0, 1, 0, 1, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_MED3_F32_e64_1:%[0-9]+]]:vgpr_32 = V_MED3_F32_e64 0, 2, 0, 2, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_MED3_F32_e64_2:%[0-9]+]]:vgpr_32 = V_MED3_F32_e64 0, 3, 0, 3, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MED3_F32_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MED3_F32_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MED3_F32_e64_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MED3_F32_e64 0, 1, 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %2:vgpr_32 = V_MED3_F32_e64 0, 2, 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
@@ -3179,12 +3131,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_sad_u8_e64
-    ; GCN: renamable $vgpr0 = V_SAD_U8_e64 1, 1, undef $vgpr0, 0, implicit $exec
-    ; GCN-NEXT: renamable $vgpr1 = V_SAD_U8_e64 2, 2, undef $vgpr0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_SAD_U8_e64 3, 3, undef $vgpr0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_SAD_U8_e64_:%[0-9]+]]:vgpr_32 = V_SAD_U8_e64 1, 1, undef %1:vgpr_32, 0, implicit $exec
+    ; GCN-NEXT: [[V_SAD_U8_e64_1:%[0-9]+]]:vgpr_32 = V_SAD_U8_e64 2, 2, undef %1:vgpr_32, 0, implicit $exec
+    ; GCN-NEXT: [[V_SAD_U8_e64_2:%[0-9]+]]:vgpr_32 = V_SAD_U8_e64 3, 3, undef %1:vgpr_32, 0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_SAD_U8_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_SAD_U8_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_SAD_U8_e64_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_SAD_U8_e64 1, 1, undef %0:vgpr_32, 0, implicit $exec
     %2:vgpr_32 = V_SAD_U8_e64 2, 2, undef %0:vgpr_32, 0, implicit $exec
@@ -3200,12 +3152,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_sad_hi_u8_e64
-    ; GCN: renamable $vgpr0 = V_SAD_HI_U8_e64 1, 1, undef $vgpr0, 0, implicit $exec
-    ; GCN-NEXT: renamable $vgpr1 = V_SAD_HI_U8_e64 2, 2, undef $vgpr0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_SAD_HI_U8_e64 3, 3, undef $vgpr0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_SAD_HI_U8_e64_:%[0-9]+]]:vgpr_32 = V_SAD_HI_U8_e64 1, 1, undef %1:vgpr_32, 0, implicit $exec
+    ; GCN-NEXT: [[V_SAD_HI_U8_e64_1:%[0-9]+]]:vgpr_32 = V_SAD_HI_U8_e64 2, 2, undef %1:vgpr_32, 0, implicit $exec
+    ; GCN-NEXT: [[V_SAD_HI_U8_e64_2:%[0-9]+]]:vgpr_32 = V_SAD_HI_U8_e64 3, 3, undef %1:vgpr_32, 0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_SAD_HI_U8_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_SAD_HI_U8_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_SAD_HI_U8_e64_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_SAD_HI_U8_e64 1, 1, undef %0:vgpr_32, 0, implicit $exec
     %2:vgpr_32 = V_SAD_HI_U8_e64 2, 2, undef %0:vgpr_32, 0, implicit $exec
@@ -3221,12 +3173,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_sad_u16_e64
-    ; GCN: renamable $vgpr0 = V_SAD_U16_e64 1, 1, undef $vgpr0, 0, implicit $exec
-    ; GCN-NEXT: renamable $vgpr1 = V_SAD_U16_e64 2, 2, undef $vgpr0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_SAD_U16_e64 3, 3, undef $vgpr0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_SAD_U16_e64_:%[0-9]+]]:vgpr_32 = V_SAD_U16_e64 1, 1, undef %1:vgpr_32, 0, implicit $exec
+    ; GCN-NEXT: [[V_SAD_U16_e64_1:%[0-9]+]]:vgpr_32 = V_SAD_U16_e64 2, 2, undef %1:vgpr_32, 0, implicit $exec
+    ; GCN-NEXT: [[V_SAD_U16_e64_2:%[0-9]+]]:vgpr_32 = V_SAD_U16_e64 3, 3, undef %1:vgpr_32, 0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_SAD_U16_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_SAD_U16_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_SAD_U16_e64_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_SAD_U16_e64 1, 1, undef %0:vgpr_32, 0, implicit $exec
     %2:vgpr_32 = V_SAD_U16_e64 2, 2, undef %0:vgpr_32, 0, implicit $exec
@@ -3242,12 +3194,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_sad_u32_e64
-    ; GCN: renamable $vgpr0 = V_SAD_U32_e64 1, 1, undef $vgpr0, 0, implicit $exec
-    ; GCN-NEXT: renamable $vgpr1 = V_SAD_U32_e64 2, 2, undef $vgpr0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_SAD_U32_e64 3, 3, undef $vgpr0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_SAD_U32_e64_:%[0-9]+]]:vgpr_32 = V_SAD_U32_e64 1, 1, undef %1:vgpr_32, 0, implicit $exec
+    ; GCN-NEXT: [[V_SAD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_SAD_U32_e64 2, 2, undef %1:vgpr_32, 0, implicit $exec
+    ; GCN-NEXT: [[V_SAD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_SAD_U32_e64 3, 3, undef %1:vgpr_32, 0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_SAD_U32_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_SAD_U32_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_SAD_U32_e64_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_SAD_U32_e64 1, 1, undef %0:vgpr_32, 0, implicit $exec
     %2:vgpr_32 = V_SAD_U32_e64 2, 2, undef %0:vgpr_32, 0, implicit $exec
@@ -3263,12 +3215,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_pk_u8_f32_e64
-    ; GCN: renamable $vgpr0 = nofpexcept V_CVT_PK_U8_F32_e64 0, 1, 0, 1, 0, undef $vgpr0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_CVT_PK_U8_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_CVT_PK_U8_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_CVT_PK_U8_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_PK_U8_F32_e64 0, 1, 0, 1, 0, undef %1:vgpr_32, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_CVT_PK_U8_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_PK_U8_F32_e64 0, 2, 0, 2, 0, undef %1:vgpr_32, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_CVT_PK_U8_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_PK_U8_F32_e64 0, 3, 0, 3, 0, undef %1:vgpr_32, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_PK_U8_F32_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_PK_U8_F32_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_PK_U8_F32_e64_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_CVT_PK_U8_F32_e64 0, 1, 0, 1, 0, undef %0:vgpr_32, 0, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_CVT_PK_U8_F32_e64 0, 2, 0, 2, 0, undef %0:vgpr_32, 0, implicit $exec, implicit $mode
@@ -3284,12 +3236,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_div_fixup_f64_e64
-    ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_DIV_FIXUP_F64_e64 0, 1, 0, 1, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_DIV_FIXUP_F64_e64 0, 2, 0, 2, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_DIV_FIXUP_F64_e64 0, 3, 0, 3, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN: [[V_DIV_FIXUP_F64_e64_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_DIV_FIXUP_F64_e64 0, 1, 0, 1, 0, undef %1:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_DIV_FIXUP_F64_e64_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_DIV_FIXUP_F64_e64 0, 2, 0, 2, 0, undef %1:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_DIV_FIXUP_F64_e64_2:%[0-9]+]]:vreg_64_align2 = nofpexcept V_DIV_FIXUP_F64_e64 0, 3, 0, 3, 0, undef %1:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_DIV_FIXUP_F64_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_DIV_FIXUP_F64_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_DIV_FIXUP_F64_e64_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vreg_64_align2 = nofpexcept V_DIV_FIXUP_F64_e64 0, 1, 0, 1, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
     %2:vreg_64_align2 = nofpexcept V_DIV_FIXUP_F64_e64 0, 2, 0, 2, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
@@ -3305,12 +3257,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_ldexp_f64_e64
-    ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_LDEXP_F64_e64 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_LDEXP_F64_e64 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_LDEXP_F64_e64 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN: [[V_LDEXP_F64_e64_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_LDEXP_F64_e64 0, 1, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_LDEXP_F64_e64_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_LDEXP_F64_e64 0, 2, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_LDEXP_F64_e64_2:%[0-9]+]]:vreg_64_align2 = nofpexcept V_LDEXP_F64_e64 0, 3, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_LDEXP_F64_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_LDEXP_F64_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_LDEXP_F64_e64_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vreg_64_align2 = nofpexcept V_LDEXP_F64_e64 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %2:vreg_64_align2 = nofpexcept V_LDEXP_F64_e64 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
@@ -3326,12 +3278,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_msad_u8_e64
-    ; GCN: renamable $vgpr0 = V_MSAD_U8_e64 1, 1, undef $vgpr0, 0, implicit $exec
-    ; GCN-NEXT: renamable $vgpr1 = V_MSAD_U8_e64 2, 2, undef $vgpr0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_MSAD_U8_e64 3, 3, undef $vgpr0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_MSAD_U8_e64_:%[0-9]+]]:vgpr_32 = V_MSAD_U8_e64 1, 1, undef %1:vgpr_32, 0, implicit $exec
+    ; GCN-NEXT: [[V_MSAD_U8_e64_1:%[0-9]+]]:vgpr_32 = V_MSAD_U8_e64 2, 2, undef %1:vgpr_32, 0, implicit $exec
+    ; GCN-NEXT: [[V_MSAD_U8_e64_2:%[0-9]+]]:vgpr_32 = V_MSAD_U8_e64 3, 3, undef %1:vgpr_32, 0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MSAD_U8_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MSAD_U8_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MSAD_U8_e64_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MSAD_U8_e64 1, 1, undef %0:vgpr_32, 0, implicit $exec
     %2:vgpr_32 = V_MSAD_U8_e64 2, 2, undef %0:vgpr_32, 0, implicit $exec
@@ -3347,12 +3299,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_trig_preop_f64_e64
-    ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_TRIG_PREOP_F64_e64 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_TRIG_PREOP_F64_e64 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_TRIG_PREOP_F64_e64 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN: [[V_TRIG_PREOP_F64_e64_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_TRIG_PREOP_F64_e64 0, 1, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_TRIG_PREOP_F64_e64_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_TRIG_PREOP_F64_e64 0, 2, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_TRIG_PREOP_F64_e64_2:%[0-9]+]]:vreg_64_align2 = nofpexcept V_TRIG_PREOP_F64_e64 0, 3, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_TRIG_PREOP_F64_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_TRIG_PREOP_F64_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_TRIG_PREOP_F64_e64_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vreg_64_align2 = nofpexcept V_TRIG_PREOP_F64_e64 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %2:vreg_64_align2 = nofpexcept V_TRIG_PREOP_F64_e64 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
@@ -3368,12 +3320,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_lshlrev_b64_e64
-    ; GCN: renamable $vgpr0_vgpr1 = V_LSHLREV_B64_e64 1, undef $vgpr0_vgpr1, implicit $exec
-    ; GCN-NEXT: renamable $vgpr2_vgpr3 = V_LSHLREV_B64_e64 2, undef $vgpr0_vgpr1, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN-NEXT: renamable $vgpr0_vgpr1 = V_LSHLREV_B64_e64 3, undef $vgpr0_vgpr1, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64_align2 = V_LSHLREV_B64_e64 1, undef %1:vreg_64_align2, implicit $exec
+    ; GCN-NEXT: [[V_LSHLREV_B64_e64_1:%[0-9]+]]:vreg_64_align2 = V_LSHLREV_B64_e64 2, undef %1:vreg_64_align2, implicit $exec
+    ; GCN-NEXT: [[V_LSHLREV_B64_e64_2:%[0-9]+]]:vreg_64_align2 = V_LSHLREV_B64_e64 3, undef %1:vreg_64_align2, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_LSHLREV_B64_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_LSHLREV_B64_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_LSHLREV_B64_e64_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vreg_64_align2 = V_LSHLREV_B64_e64 1, undef %0:vreg_64_align2, implicit $exec
     %2:vreg_64_align2 = V_LSHLREV_B64_e64 2, undef %0:vreg_64_align2, implicit $exec
@@ -3389,12 +3341,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_lshrrev_b64_e64
-    ; GCN: renamable $vgpr0_vgpr1 = V_LSHRREV_B64_e64 1, undef $vgpr0_vgpr1, implicit $exec
-    ; GCN-NEXT: renamable $vgpr2_vgpr3 = V_LSHRREV_B64_e64 2, undef $vgpr0_vgpr1, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN-NEXT: renamable $vgpr0_vgpr1 = V_LSHRREV_B64_e64 3, undef $vgpr0_vgpr1, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN: [[V_LSHRREV_B64_e64_:%[0-9]+]]:vreg_64_align2 = V_LSHRREV_B64_e64 1, undef %1:vreg_64_align2, implicit $exec
+    ; GCN-NEXT: [[V_LSHRREV_B64_e64_1:%[0-9]+]]:vreg_64_align2 = V_LSHRREV_B64_e64 2, undef %1:vreg_64_align2, implicit $exec
+    ; GCN-NEXT: [[V_LSHRREV_B64_e64_2:%[0-9]+]]:vreg_64_align2 = V_LSHRREV_B64_e64 3, undef %1:vreg_64_align2, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_LSHRREV_B64_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_LSHRREV_B64_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_LSHRREV_B64_e64_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vreg_64_align2 = V_LSHRREV_B64_e64 1, undef %0:vreg_64_align2, implicit $exec
     %2:vreg_64_align2 = V_LSHRREV_B64_e64 2, undef %0:vreg_64_align2, implicit $exec
@@ -3410,12 +3362,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_ashrrev_i64_e64
-    ; GCN: renamable $vgpr0_vgpr1 = V_ASHRREV_I64_e64 1, undef $vgpr0_vgpr1, implicit $exec
-    ; GCN-NEXT: renamable $vgpr2_vgpr3 = V_ASHRREV_I64_e64 2, undef $vgpr0_vgpr1, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN-NEXT: renamable $vgpr0_vgpr1 = V_ASHRREV_I64_e64 3, undef $vgpr0_vgpr1, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN: [[V_ASHRREV_I64_e64_:%[0-9]+]]:vreg_64_align2 = V_ASHRREV_I64_e64 1, undef %1:vreg_64_align2, implicit $exec
+    ; GCN-NEXT: [[V_ASHRREV_I64_e64_1:%[0-9]+]]:vreg_64_align2 = V_ASHRREV_I64_e64 2, undef %1:vreg_64_align2, implicit $exec
+    ; GCN-NEXT: [[V_ASHRREV_I64_e64_2:%[0-9]+]]:vreg_64_align2 = V_ASHRREV_I64_e64 3, undef %1:vreg_64_align2, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_ASHRREV_I64_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_ASHRREV_I64_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_ASHRREV_I64_e64_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vreg_64_align2 = V_ASHRREV_I64_e64 1, undef %0:vreg_64_align2, implicit $exec
     %2:vreg_64_align2 = V_ASHRREV_I64_e64 2, undef %0:vreg_64_align2, implicit $exec
@@ -3431,12 +3383,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_perm_b32_e64
-    ; GCN: renamable $vgpr0 = V_PERM_B32_e64 1, 1, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: renamable $vgpr1 = V_PERM_B32_e64 2, 2, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_PERM_B32_e64 3, 3, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_PERM_B32_e64_:%[0-9]+]]:vgpr_32 = V_PERM_B32_e64 1, 1, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_PERM_B32_e64_1:%[0-9]+]]:vgpr_32 = V_PERM_B32_e64 2, 2, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_PERM_B32_e64_2:%[0-9]+]]:vgpr_32 = V_PERM_B32_e64 3, 3, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PERM_B32_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PERM_B32_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PERM_B32_e64_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_PERM_B32_e64 1, 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_PERM_B32_e64 2, 2, undef %0:vgpr_32, implicit $exec
@@ -3452,12 +3404,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_add3_u32_e64
-    ; GCN: renamable $vgpr0 = V_ADD3_U32_e64 1, 1, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: renamable $vgpr1 = V_ADD3_U32_e64 2, 2, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_ADD3_U32_e64 3, 3, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_ADD3_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD3_U32_e64 1, 1, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_ADD3_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD3_U32_e64 2, 2, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_ADD3_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD3_U32_e64 3, 3, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_ADD3_U32_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_ADD3_U32_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_ADD3_U32_e64_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_ADD3_U32_e64 1, 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_ADD3_U32_e64 2, 2, undef %0:vgpr_32, implicit $exec
@@ -3473,12 +3425,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_and_or_b32_e64
-    ; GCN: renamable $vgpr0 = V_AND_OR_B32_e64 1, 1, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: renamable $vgpr1 = V_AND_OR_B32_e64 2, 2, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_AND_OR_B32_e64 3, 3, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_AND_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_OR_B32_e64 1, 1, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_AND_OR_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_OR_B32_e64 2, 2, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_AND_OR_B32_e64_2:%[0-9]+]]:vgpr_32 = V_AND_OR_B32_e64 3, 3, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_AND_OR_B32_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_AND_OR_B32_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_AND_OR_B32_e64_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_AND_OR_B32_e64 1, 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_AND_OR_B32_e64 2, 2, undef %0:vgpr_32, implicit $exec
@@ -3494,12 +3446,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_or3_b32_e64
-    ; GCN: renamable $vgpr0 = V_OR3_B32_e64 1, 1, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: renamable $vgpr1 = V_OR3_B32_e64 2, 2, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_OR3_B32_e64 3, 3, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_OR3_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR3_B32_e64 1, 1, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_OR3_B32_e64_1:%[0-9]+]]:vgpr_32 = V_OR3_B32_e64 2, 2, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_OR3_B32_e64_2:%[0-9]+]]:vgpr_32 = V_OR3_B32_e64 3, 3, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_OR3_B32_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_OR3_B32_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_OR3_B32_e64_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_OR3_B32_e64 1, 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_OR3_B32_e64 2, 2, undef %0:vgpr_32, implicit $exec
@@ -3515,12 +3467,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_xad_u32_e64
-    ; GCN: renamable $vgpr0 = V_XAD_U32_e64 1, 1, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: renamable $vgpr1 = V_XAD_U32_e64 2, 2, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_XAD_U32_e64 3, 3, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_XAD_U32_e64_:%[0-9]+]]:vgpr_32 = V_XAD_U32_e64 1, 1, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_XAD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_XAD_U32_e64 2, 2, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_XAD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_XAD_U32_e64 3, 3, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_XAD_U32_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_XAD_U32_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_XAD_U32_e64_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_XAD_U32_e64 1, 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_XAD_U32_e64 2, 2, undef %0:vgpr_32, implicit $exec
@@ -3536,12 +3488,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_add_i32_e64
-    ; GCN: renamable $vgpr0 = V_ADD_I32_e64 1, undef $vgpr0, 0, implicit $exec
-    ; GCN-NEXT: renamable $vgpr1 = V_ADD_I32_e64 2, undef $vgpr0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_ADD_I32_e64 3, undef $vgpr0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e64 1, undef %1:vgpr_32, 0, implicit $exec
+    ; GCN-NEXT: [[V_ADD_I32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_I32_e64 2, undef %1:vgpr_32, 0, implicit $exec
+    ; GCN-NEXT: [[V_ADD_I32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_I32_e64 3, undef %1:vgpr_32, 0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_I32_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_I32_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_I32_e64_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_ADD_I32_e64 1, undef %0:vgpr_32, 0, implicit $exec
     %2:vgpr_32 = V_ADD_I32_e64 2, undef %0:vgpr_32, 0, implicit $exec
@@ -3557,12 +3509,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_add_lshl_u32_e64
-    ; GCN: renamable $vgpr0 = V_ADD_LSHL_U32_e64 1, 1, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: renamable $vgpr1 = V_ADD_LSHL_U32_e64 2, 2, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_ADD_LSHL_U32_e64 3, 3, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_ADD_LSHL_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_LSHL_U32_e64 1, 1, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_ADD_LSHL_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_LSHL_U32_e64 2, 2, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_ADD_LSHL_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_LSHL_U32_e64 3, 3, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_LSHL_U32_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_LSHL_U32_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_LSHL_U32_e64_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_ADD_LSHL_U32_e64 1, 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_ADD_LSHL_U32_e64 2, 2, undef %0:vgpr_32, implicit $exec
@@ -3578,12 +3530,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_sub_i32_e64
-    ; GCN: renamable $vgpr0 = V_SUB_I32_e64 1, undef $vgpr0, 0, implicit $exec
-    ; GCN-NEXT: renamable $vgpr1 = V_SUB_I32_e64 2, undef $vgpr0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_SUB_I32_e64 3, undef $vgpr0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_SUB_I32_e64_:%[0-9]+]]:vgpr_32 = V_SUB_I32_e64 1, undef %1:vgpr_32, 0, implicit $exec
+    ; GCN-NEXT: [[V_SUB_I32_e64_1:%[0-9]+]]:vgpr_32 = V_SUB_I32_e64 2, undef %1:vgpr_32, 0, implicit $exec
+    ; GCN-NEXT: [[V_SUB_I32_e64_2:%[0-9]+]]:vgpr_32 = V_SUB_I32_e64 3, undef %1:vgpr_32, 0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_SUB_I32_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_SUB_I32_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_SUB_I32_e64_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_SUB_I32_e64 1, undef %0:vgpr_32, 0, implicit $exec
     %2:vgpr_32 = V_SUB_I32_e64 2, undef %0:vgpr_32, 0, implicit $exec
@@ -3599,12 +3551,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_lshl_add_u32_e64
-    ; GCN: renamable $vgpr0 = V_LSHL_ADD_U32_e64 1, 1, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: renamable $vgpr1 = V_LSHL_ADD_U32_e64 2, 2, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_LSHL_ADD_U32_e64 3, 3, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_LSHL_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_LSHL_ADD_U32_e64 1, 1, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_LSHL_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHL_ADD_U32_e64 2, 2, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_LSHL_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_LSHL_ADD_U32_e64 3, 3, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_LSHL_ADD_U32_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_LSHL_ADD_U32_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_LSHL_ADD_U32_e64_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_LSHL_ADD_U32_e64 1, 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_LSHL_ADD_U32_e64 2, 2, undef %0:vgpr_32, implicit $exec
@@ -3620,12 +3572,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_lshl_or_b32_e64
-    ; GCN: renamable $vgpr0 = V_LSHL_OR_B32_e64 1, 1, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: renamable $vgpr1 = V_LSHL_OR_B32_e64 2, 2, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_LSHL_OR_B32_e64 3, 3, undef $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN: [[V_LSHL_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 1, 1, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_LSHL_OR_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 2, 2, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: [[V_LSHL_OR_B32_e64_2:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 3, 3, undef %1:vgpr_32, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_LSHL_OR_B32_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_LSHL_OR_B32_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_LSHL_OR_B32_e64_2]]
     ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_LSHL_OR_B32_e64 1, 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_LSHL_OR_B32_e64 2, 2, undef %0:vgpr_32, implicit $exec
@@ -3645,13 +3597,14 @@ body:             |
     ; GCN-LABEL: name: test_remat_v_lshlrev_b16_e32
     ; GCN: liveins: $vgpr0
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: renamable $vgpr1 = V_LSHLREV_B16_e32 1, $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_LSHLREV_B16_e32 2, $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_LSHLREV_B16_e32 3, $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_LSHLREV_B16_e32_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e32 1, [[COPY]], implicit $exec
+    ; GCN-NEXT: [[V_LSHLREV_B16_e32_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e32 2, [[COPY]], implicit $exec
+    ; GCN-NEXT: [[V_LSHLREV_B16_e32_2:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e32 3, [[COPY]], implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_LSHLREV_B16_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_LSHLREV_B16_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_LSHLREV_B16_e32_2]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
     %0:vgpr_32 = COPY $vgpr0
     %1:vgpr_32 = V_LSHLREV_B16_e32 1, %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_LSHLREV_B16_e32 2, %0:vgpr_32, implicit $exec
@@ -3670,13 +3623,14 @@ body:             |
     ; GCN-LABEL: name: test_remat_v_lshlrev_b16_e64
     ; GCN: liveins: $vgpr0
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: renamable $vgpr1 = V_LSHLREV_B16_e64 1, $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_LSHLREV_B16_e64 2, $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_LSHLREV_B16_e64 3, $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 1, [[COPY]], implicit $exec
+    ; GCN-NEXT: [[V_LSHLREV_B16_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 2, [[COPY]], implicit $exec
+    ; GCN-NEXT: [[V_LSHLREV_B16_e64_2:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 3, [[COPY]], implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_LSHLREV_B16_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_LSHLREV_B16_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_LSHLREV_B16_e64_2]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
     %0:vgpr_32 = COPY $vgpr0
     %1:vgpr_32 = V_LSHLREV_B16_e64 1, %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_LSHLREV_B16_e64 2, %0:vgpr_32, implicit $exec
@@ -3696,13 +3650,14 @@ body:             |
     ; GCN-LABEL: name: test_remat_v_lshrrev_b16_e32
     ; GCN: liveins: $vgpr0
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: renamable $vgpr1 = V_LSHRREV_B16_e32 1, $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_LSHRREV_B16_e32 2, $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_LSHRREV_B16_e32 3, $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_LSHRREV_B16_e32_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e32 1, [[COPY]], implicit $exec
+    ; GCN-NEXT: [[V_LSHRREV_B16_e32_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e32 2, [[COPY]], implicit $exec
+    ; GCN-NEXT: [[V_LSHRREV_B16_e32_2:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e32 3, [[COPY]], implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_LSHRREV_B16_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_LSHRREV_B16_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_LSHRREV_B16_e32_2]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
     %0:vgpr_32 = COPY $vgpr0
     %1:vgpr_32 = V_LSHRREV_B16_e32 1, %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_LSHRREV_B16_e32 2, %0:vgpr_32, implicit $exec
@@ -3721,13 +3676,14 @@ body:             |
     ; GCN-LABEL: name: test_remat_v_lshrrev_b16_e64
     ; GCN: liveins: $vgpr0
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: renamable $vgpr1 = V_LSHRREV_B16_e64 1, $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_LSHRREV_B16_e64 2, $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_LSHRREV_B16_e64 3, $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 1, [[COPY]], implicit $exec
+    ; GCN-NEXT: [[V_LSHRREV_B16_e64_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 2, [[COPY]], implicit $exec
+    ; GCN-NEXT: [[V_LSHRREV_B16_e64_2:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 3, [[COPY]], implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_LSHRREV_B16_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_LSHRREV_B16_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_LSHRREV_B16_e64_2]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
     %0:vgpr_32 = COPY $vgpr0
     %1:vgpr_32 = V_LSHRREV_B16_e64 1, %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_LSHRREV_B16_e64 2, %0:vgpr_32, implicit $exec
@@ -3747,13 +3703,14 @@ body:             |
     ; GCN-LABEL: name: test_remat_v_ashrrev_i16_e32
     ; GCN: liveins: $vgpr0
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: renamable $vgpr1 = V_ASHRREV_I16_e32 1, $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_ASHRREV_I16_e32 2, $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_ASHRREV_I16_e32 3, $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_ASHRREV_I16_e32_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e32 1, [[COPY]], implicit $exec
+    ; GCN-NEXT: [[V_ASHRREV_I16_e32_1:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e32 2, [[COPY]], implicit $exec
+    ; GCN-NEXT: [[V_ASHRREV_I16_e32_2:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e32 3, [[COPY]], implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_ASHRREV_I16_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_ASHRREV_I16_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_ASHRREV_I16_e32_2]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
     %0:vgpr_32 = COPY $vgpr0
     %1:vgpr_32 = V_ASHRREV_I16_e32 1, %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_ASHRREV_I16_e32 2, %0:vgpr_32, implicit $exec
@@ -3772,13 +3729,14 @@ body:             |
     ; GCN-LABEL: name: test_remat_v_ashrrev_i16_e64
     ; GCN: liveins: $vgpr0
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: renamable $vgpr1 = V_ASHRREV_I16_e64 1, $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_ASHRREV_I16_e64 2, $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_ASHRREV_I16_e64 3, $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 1, [[COPY]], implicit $exec
+    ; GCN-NEXT: [[V_ASHRREV_I16_e64_1:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 2, [[COPY]], implicit $exec
+    ; GCN-NEXT: [[V_ASHRREV_I16_e64_2:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 3, [[COPY]], implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_ASHRREV_I16_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_ASHRREV_I16_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_ASHRREV_I16_e64_2]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
     %0:vgpr_32 = COPY $vgpr0
     %1:vgpr_32 = V_ASHRREV_I16_e64 1, %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_ASHRREV_I16_e64 2, %0:vgpr_32, implicit $exec
@@ -3798,13 +3756,14 @@ body:             |
     ; GCN-LABEL: name: test_remat_v_add_u16_e32
     ; GCN: liveins: $vgpr0
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: renamable $vgpr1 = V_ADD_U16_e32 1, $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_ADD_U16_e32 2, $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_ADD_U16_e32 3, $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_ADD_U16_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U16_e32 1, [[COPY]], implicit $exec
+    ; GCN-NEXT: [[V_ADD_U16_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U16_e32 2, [[COPY]], implicit $exec
+    ; GCN-NEXT: [[V_ADD_U16_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U16_e32 3, [[COPY]], implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_U16_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_U16_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_U16_e32_2]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
     %0:vgpr_32 = COPY $vgpr0
     %1:vgpr_32 = V_ADD_U16_e32 1, %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_ADD_U16_e32 2, %0:vgpr_32, implicit $exec
@@ -3824,13 +3783,14 @@ body:             |
     ; GCN-LABEL: name: test_remat_v_add_u16_e64
     ; GCN: liveins: $vgpr0
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: renamable $vgpr1 = V_ADD_U16_e64 1, $vgpr0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_ADD_U16_e64 2, $vgpr0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_ADD_U16_e64 3, $vgpr0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_ADD_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U16_e64 1, [[COPY]], 0, implicit $exec
+    ; GCN-NEXT: [[V_ADD_U16_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U16_e64 2, [[COPY]], 0, implicit $exec
+    ; GCN-NEXT: [[V_ADD_U16_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U16_e64 3, [[COPY]], 0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_U16_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_U16_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_U16_e64_2]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
     %0:vgpr_32 = COPY $vgpr0
     %1:vgpr_32 = V_ADD_U16_e64 1, %0:vgpr_32, 0, implicit $exec
     %2:vgpr_32 = V_ADD_U16_e64 2, %0:vgpr_32, 0, implicit $exec
@@ -3850,13 +3810,14 @@ body:             |
     ; GCN-LABEL: name: test_remat_v_sub_u16_e32
     ; GCN: liveins: $vgpr0
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: renamable $vgpr1 = V_SUB_U16_e32 1, $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_SUB_U16_e32 2, $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_SUB_U16_e32 3, $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_SUB_U16_e32_:%[0-9]+]]:vgpr_32 = V_SUB_U16_e32 1, [[COPY]], implicit $exec
+    ; GCN-NEXT: [[V_SUB_U16_e32_1:%[0-9]+]]:vgpr_32 = V_SUB_U16_e32 2, [[COPY]], implicit $exec
+    ; GCN-NEXT: [[V_SUB_U16_e32_2:%[0-9]+]]:vgpr_32 = V_SUB_U16_e32 3, [[COPY]], implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_SUB_U16_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_SUB_U16_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_SUB_U16_e32_2]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
     %0:vgpr_32 = COPY $vgpr0
     %1:vgpr_32 = V_SUB_U16_e32 1, %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_SUB_U16_e32 2, %0:vgpr_32, implicit $exec
@@ -3876,13 +3837,14 @@ body:             |
     ; GCN-LABEL: name: test_remat_v_sub_u16_e64
     ; GCN: liveins: $vgpr0
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: renamable $vgpr1 = V_SUB_U16_e64 1, $vgpr0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_SUB_U16_e64 2, $vgpr0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_SUB_U16_e64 3, $vgpr0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_SUB_U16_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U16_e64 1, [[COPY]], 0, implicit $exec
+    ; GCN-NEXT: [[V_SUB_U16_e64_1:%[0-9]+]]:vgpr_32 = V_SUB_U16_e64 2, [[COPY]], 0, implicit $exec
+    ; GCN-NEXT: [[V_SUB_U16_e64_2:%[0-9]+]]:vgpr_32 = V_SUB_U16_e64 3, [[COPY]], 0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_SUB_U16_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_SUB_U16_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_SUB_U16_e64_2]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
     %0:vgpr_32 = COPY $vgpr0
     %1:vgpr_32 = V_SUB_U16_e64 1, %0:vgpr_32, 0, implicit $exec
     %2:vgpr_32 = V_SUB_U16_e64 2, %0:vgpr_32, 0, implicit $exec
@@ -3902,13 +3864,14 @@ body:             |
     ; GCN-LABEL: name: test_remat_v_subrev_u16_e32
     ; GCN: liveins: $vgpr0
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: renamable $vgpr1 = V_SUBREV_U16_e32 1, $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_SUBREV_U16_e32 2, $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_SUBREV_U16_e32 3, $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_SUBREV_U16_e32_:%[0-9]+]]:vgpr_32 = V_SUBREV_U16_e32 1, [[COPY]], implicit $exec
+    ; GCN-NEXT: [[V_SUBREV_U16_e32_1:%[0-9]+]]:vgpr_32 = V_SUBREV_U16_e32 2, [[COPY]], implicit $exec
+    ; GCN-NEXT: [[V_SUBREV_U16_e32_2:%[0-9]+]]:vgpr_32 = V_SUBREV_U16_e32 3, [[COPY]], implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_SUBREV_U16_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_SUBREV_U16_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_SUBREV_U16_e32_2]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
     %0:vgpr_32 = COPY $vgpr0
     %1:vgpr_32 = V_SUBREV_U16_e32 1, %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_SUBREV_U16_e32 2, %0:vgpr_32, implicit $exec
@@ -3928,13 +3891,14 @@ body:             |
     ; GCN-LABEL: name: test_remat_v_subrev_u16_e64
     ; GCN: liveins: $vgpr0
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: renamable $vgpr1 = V_SUBREV_U16_e64 1, $vgpr0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_SUBREV_U16_e64 2, $vgpr0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_SUBREV_U16_e64 3, $vgpr0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_SUBREV_U16_e64_:%[0-9]+]]:vgpr_32 = V_SUBREV_U16_e64 1, [[COPY]], 0, implicit $exec
+    ; GCN-NEXT: [[V_SUBREV_U16_e64_1:%[0-9]+]]:vgpr_32 = V_SUBREV_U16_e64 2, [[COPY]], 0, implicit $exec
+    ; GCN-NEXT: [[V_SUBREV_U16_e64_2:%[0-9]+]]:vgpr_32 = V_SUBREV_U16_e64 3, [[COPY]], 0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_SUBREV_U16_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_SUBREV_U16_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_SUBREV_U16_e64_2]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
     %0:vgpr_32 = COPY $vgpr0
     %1:vgpr_32 = V_SUBREV_U16_e64 1, %0:vgpr_32, 0, implicit $exec
     %2:vgpr_32 = V_SUBREV_U16_e64 2, %0:vgpr_32, 0, implicit $exec
@@ -3954,13 +3918,14 @@ body:             |
     ; GCN-LABEL: name: test_remat_v_min_u16_e32
     ; GCN: liveins: $vgpr0
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: renamable $vgpr1 = V_MIN_U16_e32 1, $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_MIN_U16_e32 2, $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_MIN_U16_e32 3, $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_MIN_U16_e32_:%[0-9]+]]:vgpr_32 = V_MIN_U16_e32 1, [[COPY]], implicit $exec
+    ; GCN-NEXT: [[V_MIN_U16_e32_1:%[0-9]+]]:vgpr_32 = V_MIN_U16_e32 2, [[COPY]], implicit $exec
+    ; GCN-NEXT: [[V_MIN_U16_e32_2:%[0-9]+]]:vgpr_32 = V_MIN_U16_e32 3, [[COPY]], implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_U16_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_U16_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_U16_e32_2]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
     %0:vgpr_32 = COPY $vgpr0
     %1:vgpr_32 = V_MIN_U16_e32 1, %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MIN_U16_e32 2, %0:vgpr_32, implicit $exec
@@ -3980,13 +3945,14 @@ body:             |
     ; GCN-LABEL: name: test_remat_v_min_u16_e64
     ; GCN: liveins: $vgpr0
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: renamable $vgpr1 = V_MIN_U16_e64 1, $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_MIN_U16_e64 2, $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_MIN_U16_e64 3, $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_MIN_U16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 1, [[COPY]], implicit $exec
+    ; GCN-NEXT: [[V_MIN_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 2, [[COPY]], implicit $exec
+    ; GCN-NEXT: [[V_MIN_U16_e64_2:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 3, [[COPY]], implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_U16_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_U16_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_U16_e64_2]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
     %0:vgpr_32 = COPY $vgpr0
     %1:vgpr_32 = V_MIN_U16_e64 1, %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MIN_U16_e64 2, %0:vgpr_32, implicit $exec
@@ -4006,13 +3972,14 @@ body:             |
     ; GCN-LABEL: name: test_remat_v_max_u16_e32
     ; GCN: liveins: $vgpr0
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: renamable $vgpr1 = V_MAX_U16_e32 1, $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_MAX_U16_e32 2, $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_MAX_U16_e32 3, $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_MAX_U16_e32_:%[0-9]+]]:vgpr_32 = V_MAX_U16_e32 1, [[COPY]], implicit $exec
+    ; GCN-NEXT: [[V_MAX_U16_e32_1:%[0-9]+]]:vgpr_32 = V_MAX_U16_e32 2, [[COPY]], implicit $exec
+    ; GCN-NEXT: [[V_MAX_U16_e32_2:%[0-9]+]]:vgpr_32 = V_MAX_U16_e32 3, [[COPY]], implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_U16_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_U16_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_U16_e32_2]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
     %0:vgpr_32 = COPY $vgpr0
     %1:vgpr_32 = V_MAX_U16_e32 1, %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MAX_U16_e32 2, %0:vgpr_32, implicit $exec
@@ -4032,13 +3999,14 @@ body:             |
     ; GCN-LABEL: name: test_remat_v_max_u16_e64
     ; GCN: liveins: $vgpr0
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: renamable $vgpr1 = V_MAX_U16_e64 1, $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_MAX_U16_e64 2, $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_MAX_U16_e64 3, $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_MAX_U16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 1, [[COPY]], implicit $exec
+    ; GCN-NEXT: [[V_MAX_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 2, [[COPY]], implicit $exec
+    ; GCN-NEXT: [[V_MAX_U16_e64_2:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 3, [[COPY]], implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_U16_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_U16_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_U16_e64_2]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
     %0:vgpr_32 = COPY $vgpr0
     %1:vgpr_32 = V_MAX_U16_e64 1, %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MAX_U16_e64 2, %0:vgpr_32, implicit $exec
@@ -4058,13 +4026,14 @@ body:             |
     ; GCN-LABEL: name: test_remat_v_min_i16_e32
     ; GCN: liveins: $vgpr0
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: renamable $vgpr1 = V_MIN_I16_e32 1, $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_MIN_I16_e32 2, $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_MIN_I16_e32 3, $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_MIN_I16_e32_:%[0-9]+]]:vgpr_32 = V_MIN_I16_e32 1, [[COPY]], implicit $exec
+    ; GCN-NEXT: [[V_MIN_I16_e32_1:%[0-9]+]]:vgpr_32 = V_MIN_I16_e32 2, [[COPY]], implicit $exec
+    ; GCN-NEXT: [[V_MIN_I16_e32_2:%[0-9]+]]:vgpr_32 = V_MIN_I16_e32 3, [[COPY]], implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_I16_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_I16_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_I16_e32_2]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
     %0:vgpr_32 = COPY $vgpr0
     %1:vgpr_32 = V_MIN_I16_e32 1, %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MIN_I16_e32 2, %0:vgpr_32, implicit $exec
@@ -4084,13 +4053,14 @@ body:             |
     ; GCN-LABEL: name: test_remat_v_min_i16_e64
     ; GCN: liveins: $vgpr0
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: renamable $vgpr1 = V_MIN_I16_e64 1, $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_MIN_I16_e64 2, $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_MIN_I16_e64 3, $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_MIN_I16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 1, [[COPY]], implicit $exec
+    ; GCN-NEXT: [[V_MIN_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 2, [[COPY]], implicit $exec
+    ; GCN-NEXT: [[V_MIN_I16_e64_2:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 3, [[COPY]], implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_I16_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_I16_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_I16_e64_2]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
     %0:vgpr_32 = COPY $vgpr0
     %1:vgpr_32 = V_MIN_I16_e64 1, %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MIN_I16_e64 2, %0:vgpr_32, implicit $exec
@@ -4110,13 +4080,14 @@ body:             |
     ; GCN-LABEL: name: test_remat_v_max_i16_e32
     ; GCN: liveins: $vgpr0
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: renamable $vgpr1 = V_MAX_I16_e32 1, $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_MAX_I16_e32 2, $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_MAX_I16_e32 3, $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_MAX_I16_e32_:%[0-9]+]]:vgpr_32 = V_MAX_I16_e32 1, [[COPY]], implicit $exec
+    ; GCN-NEXT: [[V_MAX_I16_e32_1:%[0-9]+]]:vgpr_32 = V_MAX_I16_e32 2, [[COPY]], implicit $exec
+    ; GCN-NEXT: [[V_MAX_I16_e32_2:%[0-9]+]]:vgpr_32 = V_MAX_I16_e32 3, [[COPY]], implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_I16_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_I16_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_I16_e32_2]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
     %0:vgpr_32 = COPY $vgpr0
     %1:vgpr_32 = V_MAX_I16_e32 1, %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MAX_I16_e32 2, %0:vgpr_32, implicit $exec
@@ -4136,13 +4107,14 @@ body:             |
     ; GCN-LABEL: name: test_remat_v_max_i16_e64
     ; GCN: liveins: $vgpr0
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: renamable $vgpr1 = V_MAX_I16_e64 1, $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_MAX_I16_e64 2, $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_MAX_I16_e64 3, $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_MAX_I16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 1, [[COPY]], implicit $exec
+    ; GCN-NEXT: [[V_MAX_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 2, [[COPY]], implicit $exec
+    ; GCN-NEXT: [[V_MAX_I16_e64_2:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 3, [[COPY]], implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_I16_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_I16_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_I16_e64_2]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
     %0:vgpr_32 = COPY $vgpr0
     %1:vgpr_32 = V_MAX_I16_e64 1, %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MAX_I16_e64 2, %0:vgpr_32, implicit $exec
@@ -4162,13 +4134,14 @@ body:             |
     ; GCN-LABEL: name: test_remat_v_mul_lo_u16_e32
     ; GCN: liveins: $vgpr0
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: renamable $vgpr1 = V_MUL_LO_U16_e32 1, $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_MUL_LO_U16_e32 2, $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_MUL_LO_U16_e32 3, $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_MUL_LO_U16_e32_:%[0-9]+]]:vgpr_32 = V_MUL_LO_U16_e32 1, [[COPY]], implicit $exec
+    ; GCN-NEXT: [[V_MUL_LO_U16_e32_1:%[0-9]+]]:vgpr_32 = V_MUL_LO_U16_e32 2, [[COPY]], implicit $exec
+    ; GCN-NEXT: [[V_MUL_LO_U16_e32_2:%[0-9]+]]:vgpr_32 = V_MUL_LO_U16_e32 3, [[COPY]], implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_LO_U16_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_LO_U16_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_LO_U16_e32_2]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
     %0:vgpr_32 = COPY $vgpr0
     %1:vgpr_32 = V_MUL_LO_U16_e32 1, %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MUL_LO_U16_e32 2, %0:vgpr_32, implicit $exec
@@ -4188,13 +4161,14 @@ body:             |
     ; GCN-LABEL: name: test_remat_v_mul_lo_u16_e64
     ; GCN: liveins: $vgpr0
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: renamable $vgpr1 = V_MUL_LO_U16_e64 1, $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_MUL_LO_U16_e64 2, $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_MUL_LO_U16_e64 3, $vgpr0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_MUL_LO_U16_e64_:%[0-9]+]]:vgpr_32 = V_MUL_LO_U16_e64 1, [[COPY]], implicit $exec
+    ; GCN-NEXT: [[V_MUL_LO_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MUL_LO_U16_e64 2, [[COPY]], implicit $exec
+    ; GCN-NEXT: [[V_MUL_LO_U16_e64_2:%[0-9]+]]:vgpr_32 = V_MUL_LO_U16_e64 3, [[COPY]], implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_LO_U16_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_LO_U16_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_LO_U16_e64_2]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
     %0:vgpr_32 = COPY $vgpr0
     %1:vgpr_32 = V_MUL_LO_U16_e64 1, %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MUL_LO_U16_e64 2, %0:vgpr_32, implicit $exec
@@ -4214,13 +4188,14 @@ body:             |
     ; GCN-LABEL: name: test_remat_v_add_f16_e32
     ; GCN: liveins: $vgpr0
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_ADD_F16_e32 1, $vgpr0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_ADD_F16_e32 2, $vgpr0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_ADD_F16_e32 3, $vgpr0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_ADD_F16_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F16_e32 1, [[COPY]], implicit $mode, implicit $exec
+    ; GCN-NEXT: [[V_ADD_F16_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F16_e32 2, [[COPY]], implicit $mode, implicit $exec
+    ; GCN-NEXT: [[V_ADD_F16_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F16_e32 3, [[COPY]], implicit $mode, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_F16_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_F16_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_F16_e32_2]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
     %0:vgpr_32 = COPY $vgpr0
     %1:vgpr_32 = nofpexcept V_ADD_F16_e32 1, %0:vgpr_32, implicit $mode, implicit $exec
     %2:vgpr_32 = nofpexcept V_ADD_F16_e32 2, %0:vgpr_32, implicit $mode, implicit $exec
@@ -4240,13 +4215,14 @@ body:             |
     ; GCN-LABEL: name: test_remat_v_add_f16_e64
     ; GCN: liveins: $vgpr0
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_ADD_F16_e64 0, 1, 0, $vgpr0, 0, 0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_ADD_F16_e64 0, 2, 0, $vgpr0, 0, 0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_ADD_F16_e64 0, 3, 0, $vgpr0, 0, 0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_ADD_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F16_e64 0, 1, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+    ; GCN-NEXT: [[V_ADD_F16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F16_e64 0, 2, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+    ; GCN-NEXT: [[V_ADD_F16_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F16_e64 0, 3, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_F16_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_F16_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_F16_e64_2]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
     %0:vgpr_32 = COPY $vgpr0
     %1:vgpr_32 = nofpexcept V_ADD_F16_e64 0, 1, 0, %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
     %2:vgpr_32 = nofpexcept V_ADD_F16_e64 0, 2, 0, %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
@@ -4266,13 +4242,14 @@ body:             |
     ; GCN-LABEL: name: test_remat_v_sub_f16_e32
     ; GCN: liveins: $vgpr0
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_SUB_F16_e32 1, $vgpr0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_SUB_F16_e32 2, $vgpr0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_SUB_F16_e32 3, $vgpr0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_SUB_F16_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_SUB_F16_e32 1, [[COPY]], implicit $mode, implicit $exec
+    ; GCN-NEXT: [[V_SUB_F16_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_SUB_F16_e32 2, [[COPY]], implicit $mode, implicit $exec
+    ; GCN-NEXT: [[V_SUB_F16_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_SUB_F16_e32 3, [[COPY]], implicit $mode, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_SUB_F16_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_SUB_F16_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_SUB_F16_e32_2]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
     %0:vgpr_32 = COPY $vgpr0
     %1:vgpr_32 = nofpexcept V_SUB_F16_e32 1, %0:vgpr_32, implicit $mode, implicit $exec
     %2:vgpr_32 = nofpexcept V_SUB_F16_e32 2, %0:vgpr_32, implicit $mode, implicit $exec
@@ -4292,13 +4269,14 @@ body:             |
     ; GCN-LABEL: name: test_remat_v_sub_f16_e64
     ; GCN: liveins: $vgpr0
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_SUB_F16_e64 0, 1, 0, $vgpr0, 0, 0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_SUB_F16_e64 0, 2, 0, $vgpr0, 0, 0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_SUB_F16_e64 0, 3, 0, $vgpr0, 0, 0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_SUB_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_SUB_F16_e64 0, 1, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+    ; GCN-NEXT: [[V_SUB_F16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_SUB_F16_e64 0, 2, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+    ; GCN-NEXT: [[V_SUB_F16_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_SUB_F16_e64 0, 3, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_SUB_F16_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_SUB_F16_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_SUB_F16_e64_2]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
     %0:vgpr_32 = COPY $vgpr0
     %1:vgpr_32 = nofpexcept V_SUB_F16_e64 0, 1, 0, %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
     %2:vgpr_32 = nofpexcept V_SUB_F16_e64 0, 2, 0, %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
@@ -4318,13 +4296,14 @@ body:             |
     ; GCN-LABEL: name: test_remat_v_subrev_f16_e32
     ; GCN: liveins: $vgpr0
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_SUBREV_F16_e32 1, $vgpr0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_SUBREV_F16_e32 2, $vgpr0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_SUBREV_F16_e32 3, $vgpr0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_SUBREV_F16_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_SUBREV_F16_e32 1, [[COPY]], implicit $mode, implicit $exec
+    ; GCN-NEXT: [[V_SUBREV_F16_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_SUBREV_F16_e32 2, [[COPY]], implicit $mode, implicit $exec
+    ; GCN-NEXT: [[V_SUBREV_F16_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_SUBREV_F16_e32 3, [[COPY]], implicit $mode, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_SUBREV_F16_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_SUBREV_F16_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_SUBREV_F16_e32_2]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
     %0:vgpr_32 = COPY $vgpr0
     %1:vgpr_32 = nofpexcept V_SUBREV_F16_e32 1, %0:vgpr_32, implicit $mode, implicit $exec
     %2:vgpr_32 = nofpexcept V_SUBREV_F16_e32 2, %0:vgpr_32, implicit $mode, implicit $exec
@@ -4344,13 +4323,14 @@ body:             |
     ; GCN-LABEL: name: test_remat_v_subrev_f16_e64
     ; GCN: liveins: $vgpr0
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_SUBREV_F16_e64 0, 1, 0, $vgpr0, 0, 0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_SUBREV_F16_e64 0, 2, 0, $vgpr0, 0, 0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_SUBREV_F16_e64 0, 3, 0, $vgpr0, 0, 0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_SUBREV_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_SUBREV_F16_e64 0, 1, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+    ; GCN-NEXT: [[V_SUBREV_F16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_SUBREV_F16_e64 0, 2, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+    ; GCN-NEXT: [[V_SUBREV_F16_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_SUBREV_F16_e64 0, 3, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_SUBREV_F16_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_SUBREV_F16_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_SUBREV_F16_e64_2]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
     %0:vgpr_32 = COPY $vgpr0
     %1:vgpr_32 = nofpexcept V_SUBREV_F16_e64 0, 1, 0, %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
     %2:vgpr_32 = nofpexcept V_SUBREV_F16_e64 0, 2, 0, %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
@@ -4370,13 +4350,14 @@ body:             |
     ; GCN-LABEL: name: test_remat_v_mul_f16_e32
     ; GCN: liveins: $vgpr0
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MUL_F16_e32 1, $vgpr0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MUL_F16_e32 2, $vgpr0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MUL_F16_e32 3, $vgpr0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_MUL_F16_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F16_e32 1, [[COPY]], implicit $mode, implicit $exec
+    ; GCN-NEXT: [[V_MUL_F16_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F16_e32 2, [[COPY]], implicit $mode, implicit $exec
+    ; GCN-NEXT: [[V_MUL_F16_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F16_e32 3, [[COPY]], implicit $mode, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_F16_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_F16_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_F16_e32_2]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
     %0:vgpr_32 = COPY $vgpr0
     %1:vgpr_32 = nofpexcept V_MUL_F16_e32 1, %0:vgpr_32, implicit $mode, implicit $exec
     %2:vgpr_32 = nofpexcept V_MUL_F16_e32 2, %0:vgpr_32, implicit $mode, implicit $exec
@@ -4396,13 +4377,14 @@ body:             |
     ; GCN-LABEL: name: test_remat_v_mul_f16_e64
     ; GCN: liveins: $vgpr0
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MUL_F16_e64 0, 1, 0, $vgpr0, 0, 0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MUL_F16_e64 0, 2, 0, $vgpr0, 0, 0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MUL_F16_e64 0, 3, 0, $vgpr0, 0, 0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_MUL_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F16_e64 0, 1, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+    ; GCN-NEXT: [[V_MUL_F16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F16_e64 0, 2, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+    ; GCN-NEXT: [[V_MUL_F16_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F16_e64 0, 3, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_F16_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_F16_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_F16_e64_2]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
     %0:vgpr_32 = COPY $vgpr0
     %1:vgpr_32 = nofpexcept V_MUL_F16_e64 0, 1, 0, %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
     %2:vgpr_32 = nofpexcept V_MUL_F16_e64 0, 2, 0, %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
@@ -4422,13 +4404,14 @@ body:             |
     ; GCN-LABEL: name: test_remat_v_ldexp_f16_e32
     ; GCN: liveins: $vgpr0, $vgpr1
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_LDEXP_F16_e32 1, $vgpr0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_LDEXP_F16_e32 1, $vgpr0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_LDEXP_F16_e32 1, $vgpr0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_LDEXP_F16_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_LDEXP_F16_e32 1, [[COPY]], implicit $mode, implicit $exec
+    ; GCN-NEXT: [[V_LDEXP_F16_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_LDEXP_F16_e32 1, [[COPY]], implicit $mode, implicit $exec
+    ; GCN-NEXT: [[V_LDEXP_F16_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_LDEXP_F16_e32 1, [[COPY]], implicit $mode, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_LDEXP_F16_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_LDEXP_F16_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_LDEXP_F16_e32_2]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
     %0:vgpr_32 = COPY $vgpr0
     %1:vgpr_32 = nofpexcept V_LDEXP_F16_e32 1, %0, implicit $mode, implicit $exec
     %2:vgpr_32 = nofpexcept V_LDEXP_F16_e32 1, %0, implicit $mode, implicit $exec
@@ -4448,13 +4431,14 @@ body:             |
     ; GCN-LABEL: name: test_remat_v_ldexp_f16_e64
     ; GCN: liveins: $vgpr0, $vgpr1
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_LDEXP_F16_e64 0, $vgpr0, 0, 1, 0, 0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_LDEXP_F16_e64 0, $vgpr0, 0, 1, 0, 0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_LDEXP_F16_e64 0, $vgpr0, 0, 1, 0, 0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_LDEXP_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_LDEXP_F16_e64 0, [[COPY]], 0, 1, 0, 0, implicit $mode, implicit $exec
+    ; GCN-NEXT: [[V_LDEXP_F16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_LDEXP_F16_e64 0, [[COPY]], 0, 1, 0, 0, implicit $mode, implicit $exec
+    ; GCN-NEXT: [[V_LDEXP_F16_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_LDEXP_F16_e64 0, [[COPY]], 0, 1, 0, 0, implicit $mode, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_LDEXP_F16_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_LDEXP_F16_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_LDEXP_F16_e64_2]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
     %0:vgpr_32 = COPY $vgpr0
     %1:vgpr_32 = nofpexcept V_LDEXP_F16_e64 0, %0, 0, 1, 0, 0, implicit $mode, implicit $exec
     %2:vgpr_32 = nofpexcept V_LDEXP_F16_e64 0, %0, 0, 1, 0, 0, implicit $mode, implicit $exec
@@ -4474,13 +4458,14 @@ body:             |
     ; GCN-LABEL: name: test_remat_v_min_f16_e32
     ; GCN: liveins: $vgpr0
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MIN_F16_e32 1, $vgpr0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MIN_F16_e32 2, $vgpr0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MIN_F16_e32 3, $vgpr0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_MIN_F16_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F16_e32 1, [[COPY]], implicit $mode, implicit $exec
+    ; GCN-NEXT: [[V_MIN_F16_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F16_e32 2, [[COPY]], implicit $mode, implicit $exec
+    ; GCN-NEXT: [[V_MIN_F16_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F16_e32 3, [[COPY]], implicit $mode, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_F16_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_F16_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_F16_e32_2]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
     %0:vgpr_32 = COPY $vgpr0
     %1:vgpr_32 = nofpexcept V_MIN_F16_e32 1, %0:vgpr_32, implicit $mode, implicit $exec
     %2:vgpr_32 = nofpexcept V_MIN_F16_e32 2, %0:vgpr_32, implicit $mode, implicit $exec
@@ -4500,13 +4485,14 @@ body:             |
     ; GCN-LABEL: name: test_remat_v_min_f16_e64
     ; GCN: liveins: $vgpr0
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MIN_F16_e64 0, 1, 0, $vgpr0, 0, 0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MIN_F16_e64 0, 2, 0, $vgpr0, 0, 0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MIN_F16_e64 0, 3, 0, $vgpr0, 0, 0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_MIN_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F16_e64 0, 1, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+    ; GCN-NEXT: [[V_MIN_F16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F16_e64 0, 2, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+    ; GCN-NEXT: [[V_MIN_F16_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F16_e64 0, 3, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_F16_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_F16_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_F16_e64_2]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
     %0:vgpr_32 = COPY $vgpr0
     %1:vgpr_32 = nofpexcept V_MIN_F16_e64 0, 1, 0, %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
     %2:vgpr_32 = nofpexcept V_MIN_F16_e64 0, 2, 0, %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
@@ -4526,13 +4512,14 @@ body:             |
     ; GCN-LABEL: name: test_remat_v_max_f16_e32
     ; GCN: liveins: $vgpr0
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MAX_F16_e32 1, $vgpr0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MAX_F16_e32 2, $vgpr0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MAX_F16_e32 3, $vgpr0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_MAX_F16_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_e32 1, [[COPY]], implicit $mode, implicit $exec
+    ; GCN-NEXT: [[V_MAX_F16_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_e32 2, [[COPY]], implicit $mode, implicit $exec
+    ; GCN-NEXT: [[V_MAX_F16_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_e32 3, [[COPY]], implicit $mode, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_F16_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_F16_e32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_F16_e32_2]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
     %0:vgpr_32 = COPY $vgpr0
     %1:vgpr_32 = nofpexcept V_MAX_F16_e32 1, %0:vgpr_32, implicit $mode, implicit $exec
     %2:vgpr_32 = nofpexcept V_MAX_F16_e32 2, %0:vgpr_32, implicit $mode, implicit $exec
@@ -4552,13 +4539,14 @@ body:             |
     ; GCN-LABEL: name: test_remat_v_max_f16_e64
     ; GCN: liveins: $vgpr0
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MAX_F16_e64 0, 1, 0, $vgpr0, 0, 0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MAX_F16_e64 0, 2, 0, $vgpr0, 0, 0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MAX_F16_e64 0, 3, 0, $vgpr0, 0, 0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_MAX_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_e64 0, 1, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+    ; GCN-NEXT: [[V_MAX_F16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_e64 0, 2, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+    ; GCN-NEXT: [[V_MAX_F16_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_e64 0, 3, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_F16_e64_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_F16_e64_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_F16_e64_2]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
     %0:vgpr_32 = COPY $vgpr0
     %1:vgpr_32 = nofpexcept V_MAX_F16_e64 0, 1, 0, %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
     %2:vgpr_32 = nofpexcept V_MAX_F16_e64 0, 2, 0, %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
@@ -4578,13 +4566,14 @@ body:             |
     ; GCN-LABEL: name: test_remat_v_madak_f16
     ; GCN: liveins: $vgpr0
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MADAK_F16 1, $vgpr0, 1, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MADAK_F16 2, $vgpr0, 2, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MADAK_F16 3, $vgpr0, 3, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_MADAK_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_MADAK_F16 1, [[COPY]], 1, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_MADAK_F16_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MADAK_F16 2, [[COPY]], 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_MADAK_F16_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MADAK_F16 3, [[COPY]], 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MADAK_F16_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MADAK_F16_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MADAK_F16_2]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
     %0:vgpr_32 = COPY $vgpr0
     %1:vgpr_32 = nofpexcept V_MADAK_F16 1, %0, 1, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_MADAK_F16 2, %0, 2, implicit $exec, implicit $mode
@@ -4604,13 +4593,14 @@ body:             |
     ; GCN-LABEL: name: test_remat_v_madmk_f16
     ; GCN: liveins: $vgpr0, $vgpr1
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MADMK_F16 1, 1, $vgpr0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MADMK_F16 2, 2, $vgpr0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MADMK_F16 3, 3, $vgpr0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_MADMK_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_MADMK_F16 1, 1, [[COPY]], implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_MADMK_F16_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MADMK_F16 2, 2, [[COPY]], implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_MADMK_F16_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MADMK_F16 3, 3, [[COPY]], implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MADMK_F16_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MADMK_F16_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MADMK_F16_2]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
     %0:vgpr_32 = COPY $vgpr0
     %1:vgpr_32 = nofpexcept V_MADMK_F16 1, 1, %0, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_MADMK_F16 2, 2, %0, implicit $exec, implicit $mode
@@ -4630,13 +4620,14 @@ body:             |
     ; GCN-LABEL: name: test_remat_v_fmamk_f16
     ; GCN: liveins: $vgpr0, $vgpr1
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_FMAMK_F16 1, 1, $vgpr0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_FMAMK_F16 2, 2, $vgpr0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_FMAMK_F16 3, 3, $vgpr0, implicit $exec, implicit $mode
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_FMAMK_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMAMK_F16 1, 1, [[COPY]], implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_FMAMK_F16_1:%[0-9]+]]:vgpr_32 = nofpexcept V_FMAMK_F16 2, 2, [[COPY]], implicit $exec, implicit $mode
+    ; GCN-NEXT: [[V_FMAMK_F16_2:%[0-9]+]]:vgpr_32 = nofpexcept V_FMAMK_F16 3, 3, [[COPY]], implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit [[V_FMAMK_F16_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_FMAMK_F16_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_FMAMK_F16_2]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
     %0:vgpr_32 = COPY $vgpr0
     %1:vgpr_32 = nofpexcept V_FMAMK_F16 1, 1, %0, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_FMAMK_F16 2, 2, %0, implicit $exec, implicit $mode
@@ -4656,13 +4647,14 @@ body:             |
     ; GCN-LABEL: name: test_remat_v_pk_mad_i16
     ; GCN: liveins: $vgpr0
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: renamable $vgpr1 = V_PK_MAD_I16 8, $vgpr0, 8, $vgpr0, 8, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_PK_MAD_I16 9, $vgpr0, 9, $vgpr0, 9, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_PK_MAD_I16 10, $vgpr0, 10, $vgpr0, 10, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_PK_MAD_I16_:%[0-9]+]]:vgpr_32 = V_PK_MAD_I16 8, [[COPY]], 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+    ; GCN-NEXT: [[V_PK_MAD_I16_1:%[0-9]+]]:vgpr_32 = V_PK_MAD_I16 9, [[COPY]], 9, [[COPY]], 9, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+    ; GCN-NEXT: [[V_PK_MAD_I16_2:%[0-9]+]]:vgpr_32 = V_PK_MAD_I16 10, [[COPY]], 10, [[COPY]], 10, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MAD_I16_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MAD_I16_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MAD_I16_2]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
     %0:vgpr_32 = COPY $vgpr0
 
     %1:vgpr_32 = V_PK_MAD_I16 8, %0, 8, %0, 8, %0, 0, 0, 0, 0, 0, implicit $exec
@@ -4683,13 +4675,14 @@ body:             |
     ; GCN-LABEL: name: test_remat_v_pk_mad_u16
     ; GCN: liveins: $vgpr0
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: renamable $vgpr1 = V_PK_MAD_U16 8, $vgpr0, 8, $vgpr0, 8, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_PK_MAD_U16 9, $vgpr0, 9, $vgpr0, 9, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_PK_MAD_U16 10, $vgpr0, 10, $vgpr0, 10, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_PK_MAD_U16_:%[0-9]+]]:vgpr_32 = V_PK_MAD_U16 8, [[COPY]], 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+    ; GCN-NEXT: [[V_PK_MAD_U16_1:%[0-9]+]]:vgpr_32 = V_PK_MAD_U16 9, [[COPY]], 9, [[COPY]], 9, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+    ; GCN-NEXT: [[V_PK_MAD_U16_2:%[0-9]+]]:vgpr_32 = V_PK_MAD_U16 10, [[COPY]], 10, [[COPY]], 10, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MAD_U16_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MAD_U16_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MAD_U16_2]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
     %0:vgpr_32 = COPY $vgpr0
 
     %1:vgpr_32 = V_PK_MAD_U16 8, %0, 8, %0, 8, %0, 0, 0, 0, 0, 0, implicit $exec
@@ -4710,13 +4703,14 @@ body:             |
     ; GCN-LABEL: name: test_remat_v_pk_add_u16
     ; GCN: liveins: $vgpr0
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: renamable $vgpr1 = V_PK_ADD_U16 8, $vgpr0, 8, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_PK_ADD_U16 9, $vgpr0, 9, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_PK_ADD_U16 10, $vgpr0, 10, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_PK_ADD_U16_:%[0-9]+]]:vgpr_32 = V_PK_ADD_U16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+    ; GCN-NEXT: [[V_PK_ADD_U16_1:%[0-9]+]]:vgpr_32 = V_PK_ADD_U16 9, [[COPY]], 9, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+    ; GCN-NEXT: [[V_PK_ADD_U16_2:%[0-9]+]]:vgpr_32 = V_PK_ADD_U16 10, [[COPY]], 10, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_ADD_U16_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_ADD_U16_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_ADD_U16_2]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
     %0:vgpr_32 = COPY $vgpr0
 
     %1:vgpr_32 = V_PK_ADD_U16 8, %0, 8, %0, 0, 0, 0, 0, 0, implicit $exec
@@ -4737,13 +4731,14 @@ body:             |
     ; GCN-LABEL: name: test_remat_v_pk_add_i16
     ; GCN: liveins: $vgpr0
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: renamable $vgpr1 = V_PK_ADD_I16 8, $vgpr0, 8, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_PK_ADD_I16 9, $vgpr0, 9, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_PK_ADD_I16 10, $vgpr0, 10, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_PK_ADD_I16_:%[0-9]+]]:vgpr_32 = V_PK_ADD_I16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+    ; GCN-NEXT: [[V_PK_ADD_I16_1:%[0-9]+]]:vgpr_32 = V_PK_ADD_I16 9, [[COPY]], 9, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+    ; GCN-NEXT: [[V_PK_ADD_I16_2:%[0-9]+]]:vgpr_32 = V_PK_ADD_I16 10, [[COPY]], 10, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_ADD_I16_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_ADD_I16_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_ADD_I16_2]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
     %0:vgpr_32 = COPY $vgpr0
 
     %1:vgpr_32 = V_PK_ADD_I16 8, %0, 8, %0, 0, 0, 0, 0, 0, implicit $exec
@@ -4764,13 +4759,14 @@ body:             |
     ; GCN-LABEL: name: test_remat_v_pk_mul_lo_u16
     ; GCN: liveins: $vgpr0
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: renamable $vgpr1 = V_PK_MUL_LO_U16 8, $vgpr0, 8, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_PK_MUL_LO_U16 9, $vgpr0, 9, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_PK_MUL_LO_U16 10, $vgpr0, 10, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_PK_MUL_LO_U16_:%[0-9]+]]:vgpr_32 = V_PK_MUL_LO_U16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+    ; GCN-NEXT: [[V_PK_MUL_LO_U16_1:%[0-9]+]]:vgpr_32 = V_PK_MUL_LO_U16 9, [[COPY]], 9, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+    ; GCN-NEXT: [[V_PK_MUL_LO_U16_2:%[0-9]+]]:vgpr_32 = V_PK_MUL_LO_U16 10, [[COPY]], 10, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MUL_LO_U16_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MUL_LO_U16_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MUL_LO_U16_2]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
     %0:vgpr_32 = COPY $vgpr0
 
     %1:vgpr_32 = V_PK_MUL_LO_U16 8, %0, 8, %0, 0, 0, 0, 0, 0, implicit $exec
@@ -4791,13 +4787,14 @@ body:             |
     ; GCN-LABEL: name: test_remat_v_pk_min_i16
     ; GCN: liveins: $vgpr0
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: renamable $vgpr1 = V_PK_MIN_I16 8, $vgpr0, 8, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_PK_MIN_I16 9, $vgpr0, 9, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_PK_MIN_I16 10, $vgpr0, 10, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_PK_MIN_I16_:%[0-9]+]]:vgpr_32 = V_PK_MIN_I16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+    ; GCN-NEXT: [[V_PK_MIN_I16_1:%[0-9]+]]:vgpr_32 = V_PK_MIN_I16 9, [[COPY]], 9, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+    ; GCN-NEXT: [[V_PK_MIN_I16_2:%[0-9]+]]:vgpr_32 = V_PK_MIN_I16 10, [[COPY]], 10, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MIN_I16_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MIN_I16_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MIN_I16_2]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
     %0:vgpr_32 = COPY $vgpr0
 
     %1:vgpr_32 = V_PK_MIN_I16 8, %0, 8, %0, 0, 0, 0, 0, 0, implicit $exec
@@ -4818,13 +4815,14 @@ body:             |
     ; GCN-LABEL: name: test_remat_v_pk_max_i16
     ; GCN: liveins: $vgpr0
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: renamable $vgpr1 = V_PK_MAX_I16 8, $vgpr0, 8, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_PK_MAX_I16 9, $vgpr0, 9, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_PK_MAX_I16 10, $vgpr0, 10, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_PK_MAX_I16_:%[0-9]+]]:vgpr_32 = V_PK_MAX_I16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+    ; GCN-NEXT: [[V_PK_MAX_I16_1:%[0-9]+]]:vgpr_32 = V_PK_MAX_I16 9, [[COPY]], 9, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+    ; GCN-NEXT: [[V_PK_MAX_I16_2:%[0-9]+]]:vgpr_32 = V_PK_MAX_I16 10, [[COPY]], 10, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MAX_I16_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MAX_I16_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MAX_I16_2]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
     %0:vgpr_32 = COPY $vgpr0
 
     %1:vgpr_32 = V_PK_MAX_I16 8, %0, 8, %0, 0, 0, 0, 0, 0, implicit $exec
@@ -4845,13 +4843,14 @@ body:             |
     ; GCN-LABEL: name: test_remat_v_pk_min_u16
     ; GCN: liveins: $vgpr0
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: renamable $vgpr1 = V_PK_MIN_U16 8, $vgpr0, 8, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_PK_MIN_U16 9, $vgpr0, 9, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_PK_MIN_U16 10, $vgpr0, 10, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_PK_MIN_U16_:%[0-9]+]]:vgpr_32 = V_PK_MIN_U16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+    ; GCN-NEXT: [[V_PK_MIN_U16_1:%[0-9]+]]:vgpr_32 = V_PK_MIN_U16 9, [[COPY]], 9, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+    ; GCN-NEXT: [[V_PK_MIN_U16_2:%[0-9]+]]:vgpr_32 = V_PK_MIN_U16 10, [[COPY]], 10, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MIN_U16_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MIN_U16_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MIN_U16_2]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
     %0:vgpr_32 = COPY $vgpr0
 
     %1:vgpr_32 = V_PK_MIN_U16 8, %0, 8, %0, 0, 0, 0, 0, 0, implicit $exec
@@ -4872,13 +4871,14 @@ body:             |
     ; GCN-LABEL: name: test_remat_v_pk_max_u16
     ; GCN: liveins: $vgpr0
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: renamable $vgpr1 = V_PK_MAX_U16 8, $vgpr0, 8, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_PK_MAX_U16 9, $vgpr0, 9, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_PK_MAX_U16 10, $vgpr0, 10, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_PK_MAX_U16_:%[0-9]+]]:vgpr_32 = V_PK_MAX_U16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+    ; GCN-NEXT: [[V_PK_MAX_U16_1:%[0-9]+]]:vgpr_32 = V_PK_MAX_U16 9, [[COPY]], 9, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+    ; GCN-NEXT: [[V_PK_MAX_U16_2:%[0-9]+]]:vgpr_32 = V_PK_MAX_U16 10, [[COPY]], 10, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MAX_U16_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MAX_U16_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MAX_U16_2]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
     %0:vgpr_32 = COPY $vgpr0
 
     %1:vgpr_32 = V_PK_MAX_U16 8, %0, 8, %0, 0, 0, 0, 0, 0, implicit $exec
@@ -4899,13 +4899,14 @@ body:             |
     ; GCN-LABEL: name: test_remat_v_pk_sub_u16
     ; GCN: liveins: $vgpr0
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: renamable $vgpr1 = V_PK_SUB_U16 8, $vgpr0, 8, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_PK_SUB_U16 9, $vgpr0, 9, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_PK_SUB_U16 10, $vgpr0, 10, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_PK_SUB_U16_:%[0-9]+]]:vgpr_32 = V_PK_SUB_U16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+    ; GCN-NEXT: [[V_PK_SUB_U16_1:%[0-9]+]]:vgpr_32 = V_PK_SUB_U16 9, [[COPY]], 9, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+    ; GCN-NEXT: [[V_PK_SUB_U16_2:%[0-9]+]]:vgpr_32 = V_PK_SUB_U16 10, [[COPY]], 10, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_SUB_U16_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_SUB_U16_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_SUB_U16_2]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
     %0:vgpr_32 = COPY $vgpr0
 
     %1:vgpr_32 = V_PK_SUB_U16 8, %0, 8, %0, 0, 0, 0, 0, 0, implicit $exec
@@ -4926,13 +4927,14 @@ body:             |
     ; GCN-LABEL: name: test_remat_v_pk_sub_i16
     ; GCN: liveins: $vgpr0
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: renamable $vgpr1 = V_PK_SUB_I16 8, $vgpr0, 8, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_PK_SUB_I16 9, $vgpr0, 9, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_PK_SUB_I16 10, $vgpr0, 10, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_PK_SUB_I16_:%[0-9]+]]:vgpr_32 = V_PK_SUB_I16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+    ; GCN-NEXT: [[V_PK_SUB_I16_1:%[0-9]+]]:vgpr_32 = V_PK_SUB_I16 9, [[COPY]], 9, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+    ; GCN-NEXT: [[V_PK_SUB_I16_2:%[0-9]+]]:vgpr_32 = V_PK_SUB_I16 10, [[COPY]], 10, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_SUB_I16_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_SUB_I16_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_SUB_I16_2]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
     %0:vgpr_32 = COPY $vgpr0
 
     %1:vgpr_32 = V_PK_SUB_I16 8, %0, 8, %0, 0, 0, 0, 0, 0, implicit $exec
@@ -4953,13 +4955,14 @@ body:             |
     ; GCN-LABEL: name: test_remat_v_pk_lshlrev_b16
     ; GCN: liveins: $vgpr0
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: renamable $vgpr1 = V_PK_LSHLREV_B16 8, $vgpr0, 8, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_PK_LSHLREV_B16 9, $vgpr0, 9, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_PK_LSHLREV_B16 10, $vgpr0, 10, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_PK_LSHLREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHLREV_B16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+    ; GCN-NEXT: [[V_PK_LSHLREV_B16_1:%[0-9]+]]:vgpr_32 = V_PK_LSHLREV_B16 9, [[COPY]], 9, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+    ; GCN-NEXT: [[V_PK_LSHLREV_B16_2:%[0-9]+]]:vgpr_32 = V_PK_LSHLREV_B16 10, [[COPY]], 10, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_LSHLREV_B16_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_LSHLREV_B16_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_LSHLREV_B16_2]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
     %0:vgpr_32 = COPY $vgpr0
 
     %1:vgpr_32 = V_PK_LSHLREV_B16 8, %0, 8, %0, 0, 0, 0, 0, 0, implicit $exec
@@ -4980,13 +4983,14 @@ body:             |
     ; GCN-LABEL: name: test_remat_v_pk_ashrrev_i16
     ; GCN: liveins: $vgpr0
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: renamable $vgpr1 = V_PK_ASHRREV_I16 8, $vgpr0, 8, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_PK_ASHRREV_I16 9, $vgpr0, 9, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_PK_ASHRREV_I16 10, $vgpr0, 10, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_PK_ASHRREV_I16_:%[0-9]+]]:vgpr_32 = V_PK_ASHRREV_I16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+    ; GCN-NEXT: [[V_PK_ASHRREV_I16_1:%[0-9]+]]:vgpr_32 = V_PK_ASHRREV_I16 9, [[COPY]], 9, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+    ; GCN-NEXT: [[V_PK_ASHRREV_I16_2:%[0-9]+]]:vgpr_32 = V_PK_ASHRREV_I16 10, [[COPY]], 10, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_ASHRREV_I16_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_ASHRREV_I16_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_ASHRREV_I16_2]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
     %0:vgpr_32 = COPY $vgpr0
 
     %1:vgpr_32 = V_PK_ASHRREV_I16 8, %0, 8, %0, 0, 0, 0, 0, 0, implicit $exec
@@ -5007,13 +5011,14 @@ body:             |
     ; GCN-LABEL: name: test_remat_v_pk_lshrrev_b16
     ; GCN: liveins: $vgpr0
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: renamable $vgpr1 = V_PK_LSHRREV_B16 8, $vgpr0, 8, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_PK_LSHRREV_B16 9, $vgpr0, 9, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = V_PK_LSHRREV_B16 10, $vgpr0, 10, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_PK_LSHRREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHRREV_B16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+    ; GCN-NEXT: [[V_PK_LSHRREV_B16_1:%[0-9]+]]:vgpr_32 = V_PK_LSHRREV_B16 9, [[COPY]], 9, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+    ; GCN-NEXT: [[V_PK_LSHRREV_B16_2:%[0-9]+]]:vgpr_32 = V_PK_LSHRREV_B16 10, [[COPY]], 10, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_LSHRREV_B16_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_LSHRREV_B16_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_LSHRREV_B16_2]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
     %0:vgpr_32 = COPY $vgpr0
 
     %1:vgpr_32 = V_PK_LSHRREV_B16 8, %0, 8, %0, 0, 0, 0, 0, 0, implicit $exec
@@ -5034,13 +5039,14 @@ body:             |
     ; GCN-LABEL: name: test_remat_v_pk_add_f16
     ; GCN: liveins: $vgpr0
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_PK_ADD_F16 8, $vgpr0, 8, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_PK_ADD_F16 9, $vgpr0, 9, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_PK_ADD_F16 10, $vgpr0, 10, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_PK_ADD_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_ADD_F16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+    ; GCN-NEXT: [[V_PK_ADD_F16_1:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_ADD_F16 9, [[COPY]], 9, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+    ; GCN-NEXT: [[V_PK_ADD_F16_2:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_ADD_F16 10, [[COPY]], 10, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_ADD_F16_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_ADD_F16_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_ADD_F16_2]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
     %0:vgpr_32 = COPY $vgpr0
 
     %1:vgpr_32 = nofpexcept V_PK_ADD_F16 8, %0, 8, %0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
@@ -5064,19 +5070,14 @@ body:             |
     ; GCN-LABEL: name: test_no_remat_v_pk_add_f16
     ; GCN: liveins: $vgpr0
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: renamable $vgpr1 = V_PK_ADD_F16 8, $vgpr0, 8, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
-    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr1, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN-NEXT: renamable $vgpr1 = V_PK_ADD_F16 9, $vgpr0, 9, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
-    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN-NEXT: renamable $vgpr1 = V_PK_ADD_F16 10, $vgpr0, 10, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
-    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr1, %stack.2, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
-    ; GCN-NEXT: renamable $vgpr1 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = SI_SPILL_V32_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_PK_ADD_F16_:%[0-9]+]]:vgpr_32 = V_PK_ADD_F16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+    ; GCN-NEXT: [[V_PK_ADD_F16_1:%[0-9]+]]:vgpr_32 = V_PK_ADD_F16 9, [[COPY]], 9, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+    ; GCN-NEXT: [[V_PK_ADD_F16_2:%[0-9]+]]:vgpr_32 = V_PK_ADD_F16 10, [[COPY]], 10, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_ADD_F16_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_ADD_F16_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_ADD_F16_2]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
     %0:vgpr_32 = COPY $vgpr0
 
     %1:vgpr_32 = V_PK_ADD_F16 8, %0, 8, %0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
@@ -5097,13 +5098,14 @@ body:             |
     ; GCN-LABEL: name: test_remat_v_pk_mul_f16
     ; GCN: liveins: $vgpr0
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_PK_MUL_F16 8, $vgpr0, 8, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_PK_MUL_F16 9, $vgpr0, 9, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_PK_MUL_F16 10, $vgpr0, 10, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_PK_MUL_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_MUL_F16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+    ; GCN-NEXT: [[V_PK_MUL_F16_1:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_MUL_F16 9, [[COPY]], 9, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+    ; GCN-NEXT: [[V_PK_MUL_F16_2:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_MUL_F16 10, [[COPY]], 10, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MUL_F16_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MUL_F16_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MUL_F16_2]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
     %0:vgpr_32 = COPY $vgpr0
 
     %1:vgpr_32 = nofpexcept V_PK_MUL_F16 8, %0, 8, %0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
@@ -5124,13 +5126,14 @@ body:             |
     ; GCN-LABEL: name: test_remat_v_pk_min_f16
     ; GCN: liveins: $vgpr0
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_PK_MIN_F16 8, $vgpr0, 8, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_PK_MIN_F16 9, $vgpr0, 9, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_PK_MIN_F16 10, $vgpr0, 10, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_PK_MIN_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_MIN_F16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+    ; GCN-NEXT: [[V_PK_MIN_F16_1:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_MIN_F16 9, [[COPY]], 9, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+    ; GCN-NEXT: [[V_PK_MIN_F16_2:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_MIN_F16 10, [[COPY]], 10, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MIN_F16_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MIN_F16_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MIN_F16_2]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
     %0:vgpr_32 = COPY $vgpr0
 
     %1:vgpr_32 = nofpexcept V_PK_MIN_F16 8, %0, 8, %0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
@@ -5151,13 +5154,14 @@ body:             |
     ; GCN-LABEL: name: test_remat_v_pk_max_f16
     ; GCN: liveins: $vgpr0
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_PK_MAX_F16 8, $vgpr0, 8, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_PK_MAX_F16 9, $vgpr0, 9, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_PK_MAX_F16 10, $vgpr0, 10, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_PK_MAX_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_MAX_F16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+    ; GCN-NEXT: [[V_PK_MAX_F16_1:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_MAX_F16 9, [[COPY]], 9, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+    ; GCN-NEXT: [[V_PK_MAX_F16_2:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_MAX_F16 10, [[COPY]], 10, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MAX_F16_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MAX_F16_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MAX_F16_2]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
     %0:vgpr_32 = COPY $vgpr0
 
     %1:vgpr_32 = nofpexcept V_PK_MAX_F16 8, %0, 8, %0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
@@ -5178,13 +5182,14 @@ body:             |
     ; GCN-LABEL: name: test_remat_v_pk_fma_f16
     ; GCN: liveins: $vgpr0
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_PK_FMA_F16 8, $vgpr0, 8, $vgpr0, 8, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_PK_FMA_F16 9, $vgpr0, 9, $vgpr0, 9, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_PK_FMA_F16 10, $vgpr0, 10, $vgpr0, 10, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_PK_FMA_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_FMA_F16 8, [[COPY]], 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+    ; GCN-NEXT: [[V_PK_FMA_F16_1:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_FMA_F16 9, [[COPY]], 9, [[COPY]], 9, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+    ; GCN-NEXT: [[V_PK_FMA_F16_2:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_FMA_F16 10, [[COPY]], 10, [[COPY]], 10, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_FMA_F16_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_FMA_F16_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_FMA_F16_2]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
     %0:vgpr_32 = COPY $vgpr0
     %1:vgpr_32 = nofpexcept V_PK_FMA_F16 8, %0, 8, %0, 8, %0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
     %2:vgpr_32 = nofpexcept V_PK_FMA_F16 9, %0, 9, %0, 9, %0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
@@ -5205,13 +5210,14 @@ body:             |
     ; GCN-LABEL: name: test_remat_v_mad_mix_f32
     ; GCN: liveins: $vgpr0
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MAD_MIX_F32 8, $vgpr0, 8, $vgpr0, 8, $vgpr0, 0, 0, 0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MAD_MIX_F32 9, $vgpr0, 9, $vgpr0, 9, $vgpr0, 0, 0, 0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MAD_MIX_F32 10, $vgpr0, 10, $vgpr0, 10, $vgpr0, 0, 0, 0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_MAD_MIX_F32_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAD_MIX_F32 8, [[COPY]], 8, [[COPY]], 8, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
+    ; GCN-NEXT: [[V_MAD_MIX_F32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MAD_MIX_F32 9, [[COPY]], 9, [[COPY]], 9, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
+    ; GCN-NEXT: [[V_MAD_MIX_F32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MAD_MIX_F32 10, [[COPY]], 10, [[COPY]], 10, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MAD_MIX_F32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MAD_MIX_F32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MAD_MIX_F32_2]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
     %0:vgpr_32 = COPY $vgpr0
     %1:vgpr_32 = nofpexcept V_MAD_MIX_F32 8, %0, 8, %0, 8, %0, 0, 0, 0, implicit $mode, implicit $exec
     %2:vgpr_32 = nofpexcept V_MAD_MIX_F32 9, %0, 9, %0, 9, %0, 0, 0, 0, implicit $mode, implicit $exec
@@ -5231,13 +5237,14 @@ body:             |
     ; GCN-LABEL: name: test_remat_v_fma_mix_f32
     ; GCN: liveins: $vgpr0
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_FMA_MIX_F32 8, $vgpr0, 8, $vgpr0, 8, $vgpr0, 0, 0, 0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_FMA_MIX_F32 9, $vgpr0, 9, $vgpr0, 9, $vgpr0, 0, 0, 0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_FMA_MIX_F32 10, $vgpr0, 10, $vgpr0, 10, $vgpr0, 0, 0, 0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_FMA_MIX_F32_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_MIX_F32 8, [[COPY]], 8, [[COPY]], 8, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
+    ; GCN-NEXT: [[V_FMA_MIX_F32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_MIX_F32 9, [[COPY]], 9, [[COPY]], 9, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
+    ; GCN-NEXT: [[V_FMA_MIX_F32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_MIX_F32 10, [[COPY]], 10, [[COPY]], 10, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_FMA_MIX_F32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_FMA_MIX_F32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_FMA_MIX_F32_2]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
     %0:vgpr_32 = COPY $vgpr0
     %1:vgpr_32 = nofpexcept V_FMA_MIX_F32 8, %0, 8, %0, 8, %0, 0, 0, 0, implicit $mode, implicit $exec
     %2:vgpr_32 = nofpexcept V_FMA_MIX_F32 9, %0, 9, %0, 9, %0, 0, 0, 0, implicit $mode, implicit $exec
@@ -5257,13 +5264,14 @@ body:             |
     ; GCN-LABEL: name: test_remat_v_pk_fma_f32
     ; GCN: liveins: $vgpr0_vgpr1
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_PK_FMA_F32 8, $vgpr0_vgpr1, 8, $vgpr0_vgpr1, 11, $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_PK_FMA_F32 8, $vgpr0_vgpr1, 8, $vgpr0_vgpr1, 11, $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_PK_FMA_F32 8, $vgpr0_vgpr1, 8, $vgpr0_vgpr1, 11, $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr0_vgpr1
+    ; GCN-NEXT: [[V_PK_FMA_F32_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_PK_FMA_F32 8, [[COPY]], 8, [[COPY]], 11, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+    ; GCN-NEXT: [[V_PK_FMA_F32_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_PK_FMA_F32 8, [[COPY]], 8, [[COPY]], 11, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+    ; GCN-NEXT: [[V_PK_FMA_F32_2:%[0-9]+]]:vreg_64_align2 = nofpexcept V_PK_FMA_F32 8, [[COPY]], 8, [[COPY]], 11, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_FMA_F32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_FMA_F32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_FMA_F32_2]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
     %0:vreg_64_align2 = COPY $vgpr0_vgpr1
     %1:vreg_64_align2 = nofpexcept V_PK_FMA_F32 8, %0, 8, %0, 11, %0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
     %2:vreg_64_align2 = nofpexcept V_PK_FMA_F32 8, %0, 8, %0, 11, %0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
@@ -5285,19 +5293,14 @@ body:             |
     ; GCN-LABEL: name: test_no_remat_v_pk_fma_f32
     ; GCN: liveins: $vgpr0_vgpr1
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: renamable $vgpr2_vgpr3 = V_PK_FMA_F32 8, $vgpr0_vgpr1, 8, $vgpr0_vgpr1, 11, $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
-    ; GCN-NEXT: SI_SPILL_V64_SAVE killed $vgpr2_vgpr3, %stack.1, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.1, align 4, addrspace 5)
-    ; GCN-NEXT: renamable $vgpr2_vgpr3 = V_PK_FMA_F32 8, $vgpr0_vgpr1, 8, $vgpr0_vgpr1, 11, $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
-    ; GCN-NEXT: SI_SPILL_V64_SAVE killed $vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5)
-    ; GCN-NEXT: renamable $vgpr2_vgpr3 = V_PK_FMA_F32 8, $vgpr0_vgpr1, 8, $vgpr0_vgpr1, 11, $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
-    ; GCN-NEXT: SI_SPILL_V64_SAVE killed $vgpr2_vgpr3, %stack.2, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.2, align 4, addrspace 5)
-    ; GCN-NEXT: renamable $vgpr2_vgpr3 = SI_SPILL_V64_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.1, align 4, addrspace 5)
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN-NEXT: renamable $vgpr2_vgpr3 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5)
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN-NEXT: renamable $vgpr2_vgpr3 = SI_SPILL_V64_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.2, align 4, addrspace 5)
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr0_vgpr1
+    ; GCN-NEXT: [[V_PK_FMA_F32_:%[0-9]+]]:vreg_64_align2 = V_PK_FMA_F32 8, [[COPY]], 8, [[COPY]], 11, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+    ; GCN-NEXT: [[V_PK_FMA_F32_1:%[0-9]+]]:vreg_64_align2 = V_PK_FMA_F32 8, [[COPY]], 8, [[COPY]], 11, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+    ; GCN-NEXT: [[V_PK_FMA_F32_2:%[0-9]+]]:vreg_64_align2 = V_PK_FMA_F32 8, [[COPY]], 8, [[COPY]], 11, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_FMA_F32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_FMA_F32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_FMA_F32_2]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
     %0:vreg_64_align2 = COPY $vgpr0_vgpr1
     %1:vreg_64_align2 = V_PK_FMA_F32 8, %0, 8, %0, 11, %0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
     %2:vreg_64_align2 = V_PK_FMA_F32 8, %0, 8, %0, 11, %0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
@@ -5317,13 +5320,14 @@ body:             |
     ; GCN-LABEL: name: test_remat_v_pk_mul_f32
     ; GCN: liveins: $vgpr0_vgpr1
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_PK_MUL_F32 8, $vgpr0_vgpr1, 8, $vgpr0_vgpr1, 11, 0, 0, 0, 0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_PK_MUL_F32 8, $vgpr0_vgpr1, 8, $vgpr0_vgpr1, 11, 0, 0, 0, 0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_PK_MUL_F32 8, $vgpr0_vgpr1, 8, $vgpr0_vgpr1, 11, 0, 0, 0, 0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr0_vgpr1
+    ; GCN-NEXT: [[V_PK_MUL_F32_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, [[COPY]], 8, [[COPY]], 11, 0, 0, 0, 0, implicit $mode, implicit $exec
+    ; GCN-NEXT: [[V_PK_MUL_F32_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, [[COPY]], 8, [[COPY]], 11, 0, 0, 0, 0, implicit $mode, implicit $exec
+    ; GCN-NEXT: [[V_PK_MUL_F32_2:%[0-9]+]]:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, [[COPY]], 8, [[COPY]], 11, 0, 0, 0, 0, implicit $mode, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MUL_F32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MUL_F32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MUL_F32_2]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
     %0:vreg_64_align2 = COPY $vgpr0_vgpr1
     %1:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %0, 8, %0, 11, 0, 0, 0, 0, implicit $mode, implicit $exec
     %2:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %0, 8, %0, 11, 0, 0, 0, 0, implicit $mode, implicit $exec
@@ -5343,13 +5347,14 @@ body:             |
     ; GCN-LABEL: name: test_remat_v_pk_add_f32
     ; GCN: liveins: $vgpr0_vgpr1
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_PK_ADD_F32 8, $vgpr0_vgpr1, 8, $vgpr0_vgpr1, 11, 0, 0, 0, 0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_PK_ADD_F32 8, $vgpr0_vgpr1, 8, $vgpr0_vgpr1, 11, 0, 0, 0, 0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_PK_ADD_F32 8, $vgpr0_vgpr1, 8, $vgpr0_vgpr1, 11, 0, 0, 0, 0, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr0_vgpr1
+    ; GCN-NEXT: [[V_PK_ADD_F32_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, [[COPY]], 8, [[COPY]], 11, 0, 0, 0, 0, implicit $mode, implicit $exec
+    ; GCN-NEXT: [[V_PK_ADD_F32_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, [[COPY]], 8, [[COPY]], 11, 0, 0, 0, 0, implicit $mode, implicit $exec
+    ; GCN-NEXT: [[V_PK_ADD_F32_2:%[0-9]+]]:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, [[COPY]], 8, [[COPY]], 11, 0, 0, 0, 0, implicit $mode, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_ADD_F32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_ADD_F32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_ADD_F32_2]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
     %0:vreg_64_align2 = COPY $vgpr0_vgpr1
     %1:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %0, 8, %0, 11, 0, 0, 0, 0, implicit $mode, implicit $exec
     %2:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %0, 8, %0, 11, 0, 0, 0, 0, implicit $mode, implicit $exec
@@ -5369,13 +5374,14 @@ body:             |
     ; GCN-LABEL: name: test_remat_v_pk_mov_b32
     ; GCN: liveins: $vgpr0_vgpr1
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: renamable $vgpr2_vgpr3 = V_PK_MOV_B32 8, $vgpr0_vgpr1, 8, $vgpr0_vgpr1, 11, 0, 0, 0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN-NEXT: renamable $vgpr2_vgpr3 = V_PK_MOV_B32 9, $vgpr0_vgpr1, 9, $vgpr0_vgpr1, 12, 0, 0, 0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN-NEXT: renamable $vgpr2_vgpr3 = V_PK_MOV_B32 10, $vgpr0_vgpr1, 10, $vgpr0_vgpr1, 13, 0, 0, 0, 0, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr0_vgpr1
+    ; GCN-NEXT: [[V_PK_MOV_B32_:%[0-9]+]]:vreg_64_align2 = V_PK_MOV_B32 8, [[COPY]], 8, [[COPY]], 11, 0, 0, 0, 0, implicit $exec
+    ; GCN-NEXT: [[V_PK_MOV_B32_1:%[0-9]+]]:vreg_64_align2 = V_PK_MOV_B32 9, [[COPY]], 9, [[COPY]], 12, 0, 0, 0, 0, implicit $exec
+    ; GCN-NEXT: [[V_PK_MOV_B32_2:%[0-9]+]]:vreg_64_align2 = V_PK_MOV_B32 10, [[COPY]], 10, [[COPY]], 13, 0, 0, 0, 0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MOV_B32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MOV_B32_1]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MOV_B32_2]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
     %0:vreg_64_align2 = COPY $vgpr0_vgpr1
     %1:vreg_64_align2 = V_PK_MOV_B32 8, %0, 8, %0, 11, 0, 0, 0, 0, implicit $exec
     %2:vreg_64_align2 = V_PK_MOV_B32 9, %0, 9, %0, 12, 0, 0, 0, 0, implicit $exec
@@ -5395,12 +5401,12 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_subreg_def
-    ; GCN: renamable $vgpr1 = V_MOV_B32_e32 1, implicit $exec
-    ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e32 3, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e32 2, implicit $exec
-    ; GCN-NEXT: S_NOP 0, implicit renamable $vgpr0_vgpr1
+    ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
+    ; GCN-NEXT: undef [[V_MOV_B32_e32_1:%[0-9]+]].sub0:vreg_64 = V_MOV_B32_e32 2, implicit $exec
+    ; GCN-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 3, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MOV_B32_e32_]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MOV_B32_e32_2]]
+    ; GCN-NEXT: S_NOP 0, implicit [[V_MOV_B32_e32_1]]
     ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
     undef %1.sub0:vreg_64 = V_MOV_B32_e32 2, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/scc-clobbered-sgpr-to-vmem-spill.ll b/llvm/test/CodeGen/AMDGPU/scc-clobbered-sgpr-to-vmem-spill.ll
index 5f291489848fe..477183e63ff16 100644
--- a/llvm/test/CodeGen/AMDGPU/scc-clobbered-sgpr-to-vmem-spill.ll
+++ b/llvm/test/CodeGen/AMDGPU/scc-clobbered-sgpr-to-vmem-spill.ll
@@ -1,6 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s
 
+; XFAIL: *
+
 ; This was a negative test to catch an extreme case when all options are exhausted
 ; while trying to spill SGPRs to memory. After we enabled SGPR spills into virtual VGPRs
 ; the edge case won't arise and the test would always compile.
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-regalloc-flags.ll b/llvm/test/CodeGen/AMDGPU/sgpr-regalloc-flags.ll
index 17a19116735e4..14a02d4d2dcec 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-regalloc-flags.ll
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-regalloc-flags.ll
@@ -1,19 +1,19 @@
 ; REQUIRES: asserts
 
 ; RUN: llc -verify-machineinstrs=0 -mtriple=amdgcn-amd-amdhsa -debug-pass=Structure -o /dev/null %s 2>&1 | FileCheck -check-prefix=DEFAULT %s
-; RUN: llc -verify-machineinstrs=0 -sgpr-regalloc=greedy -vgpr-regalloc=greedy -mtriple=amdgcn-amd-amdhsa -debug-pass=Structure -o /dev/null %s 2>&1 | FileCheck -check-prefix=DEFAULT %s
+; RUN: llc -verify-machineinstrs=0 -sgpr-regalloc=greedy -wwm-regalloc=greedy -vgpr-regalloc=greedy -mtriple=amdgcn-amd-amdhsa -debug-pass=Structure -o /dev/null %s 2>&1 | FileCheck -check-prefix=DEFAULT %s
 
 ; RUN: llc -verify-machineinstrs=0 -O0 -mtriple=amdgcn-amd-amdhsa -debug-pass=Structure -o /dev/null %s 2>&1 | FileCheck -check-prefix=O0 %s
 
-; RUN: llc -verify-machineinstrs=0 -vgpr-regalloc=basic -mtriple=amdgcn-amd-amdhsa -debug-pass=Structure -o /dev/null %s 2>&1 | FileCheck -check-prefix=DEFAULT-BASIC %s
+; RUN: llc -verify-machineinstrs=0 -wwm-regalloc=basic -vgpr-regalloc=basic -mtriple=amdgcn-amd-amdhsa -debug-pass=Structure -o /dev/null %s 2>&1 | FileCheck -check-prefix=DEFAULT-BASIC %s
 ; RUN: llc -verify-machineinstrs=0 -sgpr-regalloc=basic -mtriple=amdgcn-amd-amdhsa -debug-pass=Structure -o /dev/null %s 2>&1 | FileCheck -check-prefix=BASIC-DEFAULT %s
-; RUN: llc -verify-machineinstrs=0 -sgpr-regalloc=basic -vgpr-regalloc=basic -mtriple=amdgcn-amd-amdhsa -debug-pass=Structure -o /dev/null %s 2>&1 | FileCheck -check-prefix=BASIC-BASIC %s
+; RUN: llc -verify-machineinstrs=0 -sgpr-regalloc=basic -wwm-regalloc=basic -vgpr-regalloc=basic -mtriple=amdgcn-amd-amdhsa -debug-pass=Structure -o /dev/null %s 2>&1 | FileCheck -check-prefix=BASIC-BASIC %s
 
 ; RUN: not --crash llc -verify-machineinstrs=0 -regalloc=basic -mtriple=amdgcn-amd-amdhsa -debug-pass=Structure -o /dev/null %s 2>&1 | FileCheck -check-prefix=REGALLOC %s
 ; RUN: not --crash llc -verify-machineinstrs=0 -regalloc=fast -O0 -mtriple=amdgcn-amd-amdhsa -debug-pass=Structure -o /dev/null %s 2>&1 | FileCheck -check-prefix=REGALLOC %s
 
 
-; REGALLOC: -regalloc not supported with amdgcn. Use -sgpr-regalloc and -vgpr-regalloc
+; REGALLOC: -regalloc not supported with amdgcn. Use -sgpr-regalloc, -wwm-regalloc, and -vgpr-regalloc
 
 ; DEFAULT: Greedy Register Allocator
 ; DEFAULT-NEXT: Virtual Register Rewriter
@@ -23,6 +23,11 @@
 ; DEFAULT-NEXT: SI Pre-allocate WWM Registers
 ; DEFAULT-NEXT: Greedy Register Allocator
 ; DEFAULT-NEXT: SI Lower WWM Copies
+; DEFAULT-NEXT: Virtual Register Rewriter
+; DEFAULT-NEXT: AMDGPU Reserve WWM Registers
+; DEFAULT-NEXT: Virtual Register Map
+; DEFAULT-NEXT: Live Register Matrix
+; DEFAULT-NEXT: Greedy Register Allocator
 ; DEFAULT-NEXT: GCN NSA Reassign
 ; DEFAULT-NEXT: Virtual Register Rewriter
 ; DEFAULT-NEXT: AMDGPU Mark Last Scratch Load
@@ -37,6 +42,8 @@
 ; O0-NEXT: SI Pre-allocate WWM Registers
 ; O0-NEXT: Fast Register Allocator
 ; O0-NEXT: SI Lower WWM Copies
+; O0-NEXT: AMDGPU Reserve WWM Registers
+; O0-NEXT: Fast Register Allocator
 ; O0-NEXT: SI Fix VGPR copies
 
 
@@ -60,6 +67,11 @@
 ; BASIC-DEFAULT-NEXT: Machine Optimization Remark Emitter
 ; BASIC-DEFAULT-NEXT: Greedy Register Allocator
 ; BASIC-DEFAULT-NEXT: SI Lower WWM Copies
+; BASIC-DEFAULT-NEXT: Virtual Register Rewriter
+; BASIC-DEFAULT-NEXT: AMDGPU Reserve WWM Registers
+; BASIC-DEFAULT-NEXT: Virtual Register Map
+; BASIC-DEFAULT-NEXT: Live Register Matrix
+; BASIC-DEFAULT-NEXT: Greedy Register Allocator
 ; BASIC-DEFAULT-NEXT: GCN NSA Reassign
 ; BASIC-DEFAULT-NEXT: Virtual Register Rewriter
 ; BASIC-DEFAULT-NEXT: AMDGPU Mark Last Scratch Load
@@ -75,6 +87,11 @@
 ; DEFAULT-BASIC-NEXT: SI Pre-allocate WWM Registers
 ; DEFAULT-BASIC-NEXT: Basic Register Allocator
 ; DEFAULT-BASIC-NEXT: SI Lower WWM Copies
+; DEFAULT-BASIC-NEXT: Virtual Register Rewriter
+; DEFAULT-BASIC-NEXT: AMDGPU Reserve WWM Registers
+; DEFAULT-BASIC-NEXT: Virtual Register Map
+; DEFAULT-BASIC-NEXT: Live Register Matrix
+; DEFAULT-BASIC-NEXT: Basic Register Allocator
 ; DEFAULT-BASIC-NEXT: GCN NSA Reassign
 ; DEFAULT-BASIC-NEXT: Virtual Register Rewriter
 ; DEFAULT-BASIC-NEXT: AMDGPU Mark Last Scratch Load
@@ -96,6 +113,11 @@
 ; BASIC-BASIC-NEXT: SI Pre-allocate WWM Registers
 ; BASIC-BASIC-NEXT: Basic Register Allocator
 ; BASIC-BASIC-NEXT: SI Lower WWM Copies
+; BASIC-BASIC-NEXT: Virtual Register Rewriter
+; BASIC-BASIC-NEXT: AMDGPU Reserve WWM Registers
+; BASIC-BASIC-NEXT: Virtual Register Map
+; BASIC-BASIC-NEXT: Live Register Matrix
+; BASIC-BASIC-NEXT: Basic Register Allocator
 ; BASIC-BASIC-NEXT: GCN NSA Reassign
 ; BASIC-BASIC-NEXT: Virtual Register Rewriter
 ; BASIC-BASIC-NEXT: AMDGPU Mark Last Scratch Load
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-dead-frame-in-dbg-value.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-dead-frame-in-dbg-value.mir
index a7c841d54f7c9..8308e6ba4355f 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-dead-frame-in-dbg-value.mir
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-dead-frame-in-dbg-value.mir
@@ -1,3 +1,4 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
 # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -amdgpu-spill-sgpr-to-vgpr=true -verify-machineinstrs -run-pass=si-lower-sgpr-spills -o - %s | FileCheck -check-prefix=SGPR_SPILL %s
 # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -amdgpu-spill-sgpr-to-vgpr=true -verify-machineinstrs --start-before=si-lower-sgpr-spills --stop-after=prologepilog -o - %s | FileCheck -check-prefix=PEI %s
 
@@ -44,28 +45,28 @@ body:             |
   ; SGPR_SPILL: bb.0:
   ; SGPR_SPILL-NEXT:   successors: %bb.1(0x80000000)
   ; SGPR_SPILL-NEXT: {{  $}}
-  ; SGPR_SPILL-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
   ; SGPR_SPILL-NEXT:   renamable $sgpr10 = IMPLICIT_DEF
-  ; SGPR_SPILL-NEXT:   [[V_WRITELANE_B32_:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr10, 0, [[V_WRITELANE_B32_]]
+  ; SGPR_SPILL-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+  ; SGPR_SPILL-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr10, 0, [[DEF]]
   ; SGPR_SPILL-NEXT:   DBG_VALUE $noreg, 0
   ; SGPR_SPILL-NEXT: {{  $}}
   ; SGPR_SPILL-NEXT: bb.1:
-  ; SGPR_SPILL-NEXT:   $sgpr10 = SI_RESTORE_S32_FROM_VGPR [[V_WRITELANE_B32_]], 0
-  ; SGPR_SPILL-NEXT:   KILL [[V_WRITELANE_B32_]]
+  ; SGPR_SPILL-NEXT:   $sgpr10 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 0
   ; SGPR_SPILL-NEXT:   S_ENDPGM 0
+  ;
   ; PEI-LABEL: name: test
   ; PEI: bb.0:
   ; PEI-NEXT:   successors: %bb.1(0x80000000)
+  ; PEI-NEXT:   liveins: $vgpr0
   ; PEI-NEXT: {{  $}}
-  ; PEI-NEXT:   renamable $vgpr0 = IMPLICIT_DEF
   ; PEI-NEXT:   renamable $sgpr10 = IMPLICIT_DEF
-  ; PEI-NEXT:   renamable $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr10, 0, killed $vgpr0
+  ; PEI-NEXT:   $vgpr0 = IMPLICIT_DEF
+  ; PEI-NEXT:   $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr10, 0, killed $vgpr0
   ; PEI-NEXT: {{  $}}
   ; PEI-NEXT: bb.1:
   ; PEI-NEXT:   liveins: $vgpr0
   ; PEI-NEXT: {{  $}}
-  ; PEI-NEXT:   $sgpr10 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0
-  ; PEI-NEXT:   KILL killed renamable $vgpr0
+  ; PEI-NEXT:   $sgpr10 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 0
   ; PEI-NEXT:   S_ENDPGM 0
   bb.0:
     renamable $sgpr10 = IMPLICIT_DEF
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-incorrect-fi-bookkeeping-bug.ll b/llvm/test/CodeGen/AMDGPU/sgpr-spill-incorrect-fi-bookkeeping-bug.ll
index 29622d3fd0f1b..a91c047093b95 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-incorrect-fi-bookkeeping-bug.ll
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-incorrect-fi-bookkeeping-bug.ll
@@ -1,5 +1,7 @@
 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s
 
+; XFAIL: *
+
 ; This tests for a bug that caused a crash in SIRegisterInfo::spillSGPR()
 ; which was due to incorrect book-keeping of removed dead frame indices.
 
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-no-vgprs.ll b/llvm/test/CodeGen/AMDGPU/sgpr-spill-no-vgprs.ll
index d430ba758572d..9a75b6258433a 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-no-vgprs.ll
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-no-vgprs.ll
@@ -1,6 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
+; XFAIL: *
+
 ; The first 64 SGPR spills can go to a VGPR, but there isn't a second
 ; so some spills must be to memory. The last 16 element spill runs out of lanes at the 15th element.
 
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir
index 8a5f75332557e..4b787b846fd39 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir
@@ -28,181 +28,180 @@ body:             |
   ; GCN-LABEL: name: test_main
   ; GCN: bb.0:
   ; GCN-NEXT:   successors: %bb.1(0x80000000)
-  ; GCN-NEXT:   liveins: $vcc_hi, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $sgpr102, $sgpr103, $vgpr0, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+  ; GCN-NEXT:   liveins: $vcc_hi, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $sgpr102, $sgpr103, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   $vcc_hi = frame-setup COPY $sgpr33
   ; GCN-NEXT:   $sgpr33 = frame-setup COPY $sgpr32
   ; GCN-NEXT:   $sgpr0 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-  ; GCN-NEXT:   SCRATCH_STORE_DWORD_SADDR $vgpr3, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.69, addrspace 5)
-  ; GCN-NEXT:   SCRATCH_STORE_DWORD_SADDR $vgpr4, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.70, addrspace 5)
-  ; GCN-NEXT:   SCRATCH_STORE_DWORD_SADDR $vgpr5, $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.71, addrspace 5)
-  ; GCN-NEXT:   SCRATCH_STORE_DWORD_SADDR $vgpr2, $sgpr33, 12, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.72, addrspace 5)
-  ; GCN-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.73, addrspace 5)
+  ; GCN-NEXT:   SCRATCH_STORE_DWORD_SADDR $vgpr1, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.69, addrspace 5)
+  ; GCN-NEXT:   SCRATCH_STORE_DWORD_SADDR $vgpr2, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.70, addrspace 5)
+  ; GCN-NEXT:   SCRATCH_STORE_DWORD_SADDR $vgpr3, $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.71, addrspace 5)
+  ; GCN-NEXT:   SCRATCH_STORE_DWORD_SADDR $vgpr4, $sgpr33, 12, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.72, addrspace 5)
+  ; GCN-NEXT:   SCRATCH_STORE_DWORD_SADDR $vgpr5, $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.73, addrspace 5)
   ; GCN-NEXT:   $exec_lo = S_MOV_B32 killed $sgpr0
   ; GCN-NEXT:   $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 24, implicit-def dead $scc
-  ; GCN-NEXT:   renamable $vgpr2 = IMPLICIT_DEF
-  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, $vgpr3
-  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr5, 1, $vgpr3
-  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr6, 2, $vgpr3
-  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr7, 3, $vgpr3
-  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr8, 4, $vgpr3
-  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr9, 5, $vgpr3
-  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr10, 6, $vgpr3
-  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr11, 7, $vgpr3
-  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr12, 8, $vgpr3
-  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr13, 9, $vgpr3
-  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr14, 10, $vgpr3
-  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr15, 11, $vgpr3
-  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr16, 12, $vgpr3
-  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr17, 13, $vgpr3
-  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr18, 14, $vgpr3
-  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr19, 15, $vgpr3
-  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr20, 16, $vgpr3
-  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr21, 17, $vgpr3
-  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr22, 18, $vgpr3
-  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr23, 19, $vgpr3
-  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr24, 20, $vgpr3
-  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr25, 21, $vgpr3
-  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr26, 22, $vgpr3
-  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr27, 23, $vgpr3
-  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr28, 24, $vgpr3
-  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr29, 25, $vgpr3
-  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr30, 26, $vgpr3
-  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr31, 27, $vgpr3
-  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr64, 28, $vgpr3
-  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr65, 29, $vgpr3
-  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr66, 30, $vgpr3
-  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr67, 31, $vgpr3
-  ; GCN-NEXT:   $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr68, 0, $vgpr4
-  ; GCN-NEXT:   $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr69, 1, $vgpr4
-  ; GCN-NEXT:   $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr70, 2, $vgpr4
-  ; GCN-NEXT:   $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr71, 3, $vgpr4
-  ; GCN-NEXT:   $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr72, 4, $vgpr4
-  ; GCN-NEXT:   $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr73, 5, $vgpr4
-  ; GCN-NEXT:   $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr74, 6, $vgpr4
-  ; GCN-NEXT:   $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr75, 7, $vgpr4
-  ; GCN-NEXT:   $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr76, 8, $vgpr4
-  ; GCN-NEXT:   $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr77, 9, $vgpr4
-  ; GCN-NEXT:   $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr78, 10, $vgpr4
-  ; GCN-NEXT:   $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr79, 11, $vgpr4
-  ; GCN-NEXT:   $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr80, 12, $vgpr4
-  ; GCN-NEXT:   $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr81, 13, $vgpr4
-  ; GCN-NEXT:   $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr82, 14, $vgpr4
-  ; GCN-NEXT:   $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr83, 15, $vgpr4
-  ; GCN-NEXT:   $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr84, 16, $vgpr4
-  ; GCN-NEXT:   $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr85, 17, $vgpr4
-  ; GCN-NEXT:   $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr86, 18, $vgpr4
-  ; GCN-NEXT:   $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr87, 19, $vgpr4
-  ; GCN-NEXT:   $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr88, 20, $vgpr4
-  ; GCN-NEXT:   $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr89, 21, $vgpr4
-  ; GCN-NEXT:   $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr90, 22, $vgpr4
-  ; GCN-NEXT:   $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr91, 23, $vgpr4
-  ; GCN-NEXT:   $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr92, 24, $vgpr4
-  ; GCN-NEXT:   $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr93, 25, $vgpr4
-  ; GCN-NEXT:   $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr94, 26, $vgpr4
-  ; GCN-NEXT:   $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr95, 27, $vgpr4
-  ; GCN-NEXT:   $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr96, 28, $vgpr4
-  ; GCN-NEXT:   $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr97, 29, $vgpr4
-  ; GCN-NEXT:   $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr98, 30, $vgpr4
-  ; GCN-NEXT:   $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr99, 31, $vgpr4
-  ; GCN-NEXT:   $vgpr5 = SI_SPILL_S32_TO_VGPR $sgpr100, 0, $vgpr5
-  ; GCN-NEXT:   $vgpr5 = SI_SPILL_S32_TO_VGPR $sgpr101, 1, $vgpr5
-  ; GCN-NEXT:   $vgpr5 = SI_SPILL_S32_TO_VGPR $sgpr102, 2, $vgpr5
-  ; GCN-NEXT:   $vgpr5 = SI_SPILL_S32_TO_VGPR $sgpr103, 3, $vgpr5
+  ; GCN-NEXT:   $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, $vgpr2
+  ; GCN-NEXT:   $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr5, 1, $vgpr2
+  ; GCN-NEXT:   $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr6, 2, $vgpr2
+  ; GCN-NEXT:   $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr7, 3, $vgpr2
+  ; GCN-NEXT:   $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr8, 4, $vgpr2
+  ; GCN-NEXT:   $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr9, 5, $vgpr2
+  ; GCN-NEXT:   $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr10, 6, $vgpr2
+  ; GCN-NEXT:   $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr11, 7, $vgpr2
+  ; GCN-NEXT:   $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr12, 8, $vgpr2
+  ; GCN-NEXT:   $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr13, 9, $vgpr2
+  ; GCN-NEXT:   $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr14, 10, $vgpr2
+  ; GCN-NEXT:   $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr15, 11, $vgpr2
+  ; GCN-NEXT:   $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr16, 12, $vgpr2
+  ; GCN-NEXT:   $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr17, 13, $vgpr2
+  ; GCN-NEXT:   $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr18, 14, $vgpr2
+  ; GCN-NEXT:   $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr19, 15, $vgpr2
+  ; GCN-NEXT:   $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr20, 16, $vgpr2
+  ; GCN-NEXT:   $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr21, 17, $vgpr2
+  ; GCN-NEXT:   $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr22, 18, $vgpr2
+  ; GCN-NEXT:   $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr23, 19, $vgpr2
+  ; GCN-NEXT:   $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr24, 20, $vgpr2
+  ; GCN-NEXT:   $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr25, 21, $vgpr2
+  ; GCN-NEXT:   $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr26, 22, $vgpr2
+  ; GCN-NEXT:   $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr27, 23, $vgpr2
+  ; GCN-NEXT:   $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr28, 24, $vgpr2
+  ; GCN-NEXT:   $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr29, 25, $vgpr2
+  ; GCN-NEXT:   $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr30, 26, $vgpr2
+  ; GCN-NEXT:   $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr31, 27, $vgpr2
+  ; GCN-NEXT:   $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr64, 28, $vgpr2
+  ; GCN-NEXT:   $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr65, 29, $vgpr2
+  ; GCN-NEXT:   $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr66, 30, $vgpr2
+  ; GCN-NEXT:   $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr67, 31, $vgpr2
+  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr68, 0, $vgpr3
+  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr69, 1, $vgpr3
+  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr70, 2, $vgpr3
+  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr71, 3, $vgpr3
+  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr72, 4, $vgpr3
+  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr73, 5, $vgpr3
+  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr74, 6, $vgpr3
+  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr75, 7, $vgpr3
+  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr76, 8, $vgpr3
+  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr77, 9, $vgpr3
+  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr78, 10, $vgpr3
+  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr79, 11, $vgpr3
+  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr80, 12, $vgpr3
+  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr81, 13, $vgpr3
+  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr82, 14, $vgpr3
+  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr83, 15, $vgpr3
+  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr84, 16, $vgpr3
+  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr85, 17, $vgpr3
+  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr86, 18, $vgpr3
+  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr87, 19, $vgpr3
+  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr88, 20, $vgpr3
+  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr89, 21, $vgpr3
+  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr90, 22, $vgpr3
+  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr91, 23, $vgpr3
+  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr92, 24, $vgpr3
+  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr93, 25, $vgpr3
+  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr94, 26, $vgpr3
+  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr95, 27, $vgpr3
+  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr96, 28, $vgpr3
+  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr97, 29, $vgpr3
+  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr98, 30, $vgpr3
+  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr99, 31, $vgpr3
+  ; GCN-NEXT:   $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr100, 0, $vgpr4
+  ; GCN-NEXT:   $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr101, 1, $vgpr4
+  ; GCN-NEXT:   $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr102, 2, $vgpr4
+  ; GCN-NEXT:   $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr103, 3, $vgpr4
   ; GCN-NEXT:   $sgpr22 = IMPLICIT_DEF
-  ; GCN-NEXT:   renamable $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr22, 0, killed $vgpr2
+  ; GCN-NEXT:   $vgpr5 = IMPLICIT_DEF
+  ; GCN-NEXT:   $vgpr5 = SI_SPILL_S32_TO_VGPR $sgpr22, 0, killed $vgpr5
   ; GCN-NEXT:   dead $vgpr1 = V_SET_INACTIVE_B32 $vgpr0, 0, implicit $exec, implicit-def $scc
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT: bb.1:
   ; GCN-NEXT:   successors: %bb.2(0x80000000)
-  ; GCN-NEXT:   liveins: $vcc_hi, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+  ; GCN-NEXT:   liveins: $vcc_hi, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   KILL implicit-def $vcc_lo, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99_sgpr100_sgpr101_sgpr102_sgpr103
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT: bb.2:
   ; GCN-NEXT:   successors: %bb.3(0x80000000)
-  ; GCN-NEXT:   liveins: $vcc_hi, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+  ; GCN-NEXT:   liveins: $vcc_hi, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
   ; GCN-NEXT: {{  $}}
-  ; GCN-NEXT:   $sgpr22 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 0
+  ; GCN-NEXT:   $sgpr22 = SI_RESTORE_S32_FROM_VGPR killed $vgpr5, 0
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT: bb.3:
-  ; GCN-NEXT:   liveins: $vcc_hi, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+  ; GCN-NEXT:   liveins: $vcc_hi, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
   ; GCN-NEXT: {{  $}}
-  ; GCN-NEXT:   $sgpr103 = SI_RESTORE_S32_FROM_VGPR $vgpr5, 3
-  ; GCN-NEXT:   $sgpr102 = SI_RESTORE_S32_FROM_VGPR $vgpr5, 2
-  ; GCN-NEXT:   $sgpr101 = SI_RESTORE_S32_FROM_VGPR $vgpr5, 1
-  ; GCN-NEXT:   $sgpr100 = SI_RESTORE_S32_FROM_VGPR $vgpr5, 0
-  ; GCN-NEXT:   $sgpr99 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 31
-  ; GCN-NEXT:   $sgpr98 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 30
-  ; GCN-NEXT:   $sgpr97 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 29
-  ; GCN-NEXT:   $sgpr96 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 28
-  ; GCN-NEXT:   $sgpr95 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 27
-  ; GCN-NEXT:   $sgpr94 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 26
-  ; GCN-NEXT:   $sgpr93 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 25
-  ; GCN-NEXT:   $sgpr92 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 24
-  ; GCN-NEXT:   $sgpr91 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 23
-  ; GCN-NEXT:   $sgpr90 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 22
-  ; GCN-NEXT:   $sgpr89 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 21
-  ; GCN-NEXT:   $sgpr88 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 20
-  ; GCN-NEXT:   $sgpr87 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 19
-  ; GCN-NEXT:   $sgpr86 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 18
-  ; GCN-NEXT:   $sgpr85 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 17
-  ; GCN-NEXT:   $sgpr84 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 16
-  ; GCN-NEXT:   $sgpr83 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 15
-  ; GCN-NEXT:   $sgpr82 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 14
-  ; GCN-NEXT:   $sgpr81 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 13
-  ; GCN-NEXT:   $sgpr80 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 12
-  ; GCN-NEXT:   $sgpr79 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 11
-  ; GCN-NEXT:   $sgpr78 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 10
-  ; GCN-NEXT:   $sgpr77 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 9
-  ; GCN-NEXT:   $sgpr76 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 8
-  ; GCN-NEXT:   $sgpr75 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 7
-  ; GCN-NEXT:   $sgpr74 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 6
-  ; GCN-NEXT:   $sgpr73 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 5
-  ; GCN-NEXT:   $sgpr72 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 4
-  ; GCN-NEXT:   $sgpr71 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 3
-  ; GCN-NEXT:   $sgpr70 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 2
-  ; GCN-NEXT:   $sgpr69 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 1
-  ; GCN-NEXT:   $sgpr68 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 0
-  ; GCN-NEXT:   $sgpr67 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 31
-  ; GCN-NEXT:   $sgpr66 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 30
-  ; GCN-NEXT:   $sgpr65 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 29
-  ; GCN-NEXT:   $sgpr64 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 28
-  ; GCN-NEXT:   $sgpr31 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 27
-  ; GCN-NEXT:   $sgpr30 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 26
-  ; GCN-NEXT:   $sgpr29 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 25
-  ; GCN-NEXT:   $sgpr28 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 24
-  ; GCN-NEXT:   $sgpr27 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 23
-  ; GCN-NEXT:   $sgpr26 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 22
-  ; GCN-NEXT:   $sgpr25 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 21
-  ; GCN-NEXT:   $sgpr24 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 20
-  ; GCN-NEXT:   $sgpr23 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 19
-  ; GCN-NEXT:   $sgpr22 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 18
-  ; GCN-NEXT:   $sgpr21 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 17
-  ; GCN-NEXT:   $sgpr20 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 16
-  ; GCN-NEXT:   $sgpr19 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 15
-  ; GCN-NEXT:   $sgpr18 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 14
-  ; GCN-NEXT:   $sgpr17 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 13
-  ; GCN-NEXT:   $sgpr16 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 12
-  ; GCN-NEXT:   $sgpr15 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 11
-  ; GCN-NEXT:   $sgpr14 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 10
-  ; GCN-NEXT:   $sgpr13 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 9
-  ; GCN-NEXT:   $sgpr12 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 8
-  ; GCN-NEXT:   $sgpr11 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 7
-  ; GCN-NEXT:   $sgpr10 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 6
-  ; GCN-NEXT:   $sgpr9 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 5
-  ; GCN-NEXT:   $sgpr8 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 4
-  ; GCN-NEXT:   $sgpr7 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 3
-  ; GCN-NEXT:   $sgpr6 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 2
-  ; GCN-NEXT:   $sgpr5 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 1
-  ; GCN-NEXT:   $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 0
-  ; GCN-NEXT:   KILL killed renamable $vgpr2
+  ; GCN-NEXT:   $sgpr103 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 3
+  ; GCN-NEXT:   $sgpr102 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 2
+  ; GCN-NEXT:   $sgpr101 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 1
+  ; GCN-NEXT:   $sgpr100 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 0
+  ; GCN-NEXT:   $sgpr99 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 31
+  ; GCN-NEXT:   $sgpr98 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 30
+  ; GCN-NEXT:   $sgpr97 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 29
+  ; GCN-NEXT:   $sgpr96 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 28
+  ; GCN-NEXT:   $sgpr95 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 27
+  ; GCN-NEXT:   $sgpr94 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 26
+  ; GCN-NEXT:   $sgpr93 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 25
+  ; GCN-NEXT:   $sgpr92 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 24
+  ; GCN-NEXT:   $sgpr91 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 23
+  ; GCN-NEXT:   $sgpr90 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 22
+  ; GCN-NEXT:   $sgpr89 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 21
+  ; GCN-NEXT:   $sgpr88 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 20
+  ; GCN-NEXT:   $sgpr87 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 19
+  ; GCN-NEXT:   $sgpr86 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 18
+  ; GCN-NEXT:   $sgpr85 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 17
+  ; GCN-NEXT:   $sgpr84 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 16
+  ; GCN-NEXT:   $sgpr83 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 15
+  ; GCN-NEXT:   $sgpr82 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 14
+  ; GCN-NEXT:   $sgpr81 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 13
+  ; GCN-NEXT:   $sgpr80 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 12
+  ; GCN-NEXT:   $sgpr79 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 11
+  ; GCN-NEXT:   $sgpr78 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 10
+  ; GCN-NEXT:   $sgpr77 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 9
+  ; GCN-NEXT:   $sgpr76 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 8
+  ; GCN-NEXT:   $sgpr75 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 7
+  ; GCN-NEXT:   $sgpr74 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 6
+  ; GCN-NEXT:   $sgpr73 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 5
+  ; GCN-NEXT:   $sgpr72 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 4
+  ; GCN-NEXT:   $sgpr71 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 3
+  ; GCN-NEXT:   $sgpr70 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 2
+  ; GCN-NEXT:   $sgpr69 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 1
+  ; GCN-NEXT:   $sgpr68 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 0
+  ; GCN-NEXT:   $sgpr67 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 31
+  ; GCN-NEXT:   $sgpr66 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 30
+  ; GCN-NEXT:   $sgpr65 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 29
+  ; GCN-NEXT:   $sgpr64 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 28
+  ; GCN-NEXT:   $sgpr31 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 27
+  ; GCN-NEXT:   $sgpr30 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 26
+  ; GCN-NEXT:   $sgpr29 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 25
+  ; GCN-NEXT:   $sgpr28 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 24
+  ; GCN-NEXT:   $sgpr27 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 23
+  ; GCN-NEXT:   $sgpr26 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 22
+  ; GCN-NEXT:   $sgpr25 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 21
+  ; GCN-NEXT:   $sgpr24 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 20
+  ; GCN-NEXT:   $sgpr23 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 19
+  ; GCN-NEXT:   $sgpr22 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 18
+  ; GCN-NEXT:   $sgpr21 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 17
+  ; GCN-NEXT:   $sgpr20 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 16
+  ; GCN-NEXT:   $sgpr19 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 15
+  ; GCN-NEXT:   $sgpr18 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 14
+  ; GCN-NEXT:   $sgpr17 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 13
+  ; GCN-NEXT:   $sgpr16 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 12
+  ; GCN-NEXT:   $sgpr15 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 11
+  ; GCN-NEXT:   $sgpr14 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 10
+  ; GCN-NEXT:   $sgpr13 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 9
+  ; GCN-NEXT:   $sgpr12 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 8
+  ; GCN-NEXT:   $sgpr11 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 7
+  ; GCN-NEXT:   $sgpr10 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 6
+  ; GCN-NEXT:   $sgpr9 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 5
+  ; GCN-NEXT:   $sgpr8 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 4
+  ; GCN-NEXT:   $sgpr7 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 3
+  ; GCN-NEXT:   $sgpr6 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 2
+  ; GCN-NEXT:   $sgpr5 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 1
+  ; GCN-NEXT:   $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 0
   ; GCN-NEXT:   $sgpr0 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-  ; GCN-NEXT:   $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.69, addrspace 5)
-  ; GCN-NEXT:   $vgpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.70, addrspace 5)
-  ; GCN-NEXT:   $vgpr5 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.71, addrspace 5)
-  ; GCN-NEXT:   $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 12, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.72, addrspace 5)
-  ; GCN-NEXT:   $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.73, addrspace 5)
+  ; GCN-NEXT:   $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.69, addrspace 5)
+  ; GCN-NEXT:   $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.70, addrspace 5)
+  ; GCN-NEXT:   $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.71, addrspace 5)
+  ; GCN-NEXT:   $vgpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 12, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.72, addrspace 5)
+  ; GCN-NEXT:   $vgpr5 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.73, addrspace 5)
   ; GCN-NEXT:   $exec_lo = S_MOV_B32 killed $sgpr0
   ; GCN-NEXT:   $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -24, implicit-def dead $scc
   ; GCN-NEXT:   $sgpr33 = frame-destroy COPY $vcc_hi
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spills-empty-prolog-block.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spills-empty-prolog-block.mir
index 0c6c0069911f0..bed7c0c12b7cb 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-spills-empty-prolog-block.mir
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spills-empty-prolog-block.mir
@@ -18,11 +18,9 @@ body:             |
   ; CHECK-NEXT:   successors: %bb.1(0x80000000)
   ; CHECK-NEXT:   liveins: $sgpr0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
-  ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
   ; CHECK-NEXT:   $sgpr0 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 0
-  ; CHECK-NEXT:   KILL [[DEF]]
   ; CHECK-NEXT:   S_ENDPGM 0
   bb.0:
     liveins: $sgpr0
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll b/llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll
index f523b4a2495f1..676895591c1b4 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll
@@ -19,7 +19,7 @@ define void @spill_sgpr_with_no_lower_vgpr_available() #0 {
 ; GCN-NEXT:    s_mov_b32 s18, s33
 ; GCN-NEXT:    s_mov_b32 s33, s32
 ; GCN-NEXT:    s_or_saveexec_b64 s[16:17], -1
-; GCN-NEXT:    buffer_store_dword v255, off, s[0:3], s33 offset:448 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v255, off, s[0:3], s33 offset:452 ; 4-byte Folded Spill
 ; GCN-NEXT:    s_mov_b64 exec, s[16:17]
 ; GCN-NEXT:    s_add_i32 s32, s32, 0x7400
 ; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s33 offset:440 ; 4-byte Folded Spill
@@ -135,13 +135,13 @@ define void @spill_sgpr_with_no_lower_vgpr_available() #0 {
 ; GCN-NEXT:    buffer_store_dword v254, off, s[0:3], s33 ; 4-byte Folded Spill
 ; GCN-NEXT:    v_writelane_b32 v255, s30, 0
 ; GCN-NEXT:    v_writelane_b32 v255, s31, 1
-; GCN-NEXT:    buffer_store_dword v31, off, s[0:3], s33 offset:452 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v31, off, s[0:3], s33 offset:448 ; 4-byte Folded Spill
 ; GCN-NEXT:    v_mov_b32_e32 v0, 0
 ; GCN-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:444
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    buffer_load_dword v31, off, s[0:3], s33 offset:452 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v31, off, s[0:3], s33 offset:448 ; 4-byte Folded Reload
 ; GCN-NEXT:    s_getpc_b64 s[16:17]
 ; GCN-NEXT:    s_add_u32 s16, s16, child_function at gotpcrel32@lo+4
 ; GCN-NEXT:    s_addc_u32 s17, s17, child_function at gotpcrel32@hi+12
@@ -266,7 +266,7 @@ define void @spill_sgpr_with_no_lower_vgpr_available() #0 {
 ; GCN-NEXT:    buffer_load_dword v41, off, s[0:3], s33 offset:436 ; 4-byte Folded Reload
 ; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s33 offset:440 ; 4-byte Folded Reload
 ; GCN-NEXT:    s_or_saveexec_b64 s[4:5], -1
-; GCN-NEXT:    buffer_load_dword v255, off, s[0:3], s33 offset:448 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v255, off, s[0:3], s33 offset:452 ; 4-byte Folded Reload
 ; GCN-NEXT:    s_mov_b64 exec, s[4:5]
 ; GCN-NEXT:    s_add_i32 s32, s32, 0xffff8c00
 ; GCN-NEXT:    s_mov_b32 s33, s18
@@ -313,7 +313,7 @@ define void @spill_to_lowest_available_vgpr() #0 {
 ; GCN-NEXT:    s_mov_b32 s18, s33
 ; GCN-NEXT:    s_mov_b32 s33, s32
 ; GCN-NEXT:    s_or_saveexec_b64 s[16:17], -1
-; GCN-NEXT:    buffer_store_dword v254, off, s[0:3], s33 offset:444 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v254, off, s[0:3], s33 offset:448 ; 4-byte Folded Spill
 ; GCN-NEXT:    s_mov_b64 exec, s[16:17]
 ; GCN-NEXT:    s_add_i32 s32, s32, 0x7400
 ; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s33 offset:436 ; 4-byte Folded Spill
@@ -428,13 +428,13 @@ define void @spill_to_lowest_available_vgpr() #0 {
 ; GCN-NEXT:    buffer_store_dword v253, off, s[0:3], s33 ; 4-byte Folded Spill
 ; GCN-NEXT:    v_writelane_b32 v254, s30, 0
 ; GCN-NEXT:    v_writelane_b32 v254, s31, 1
-; GCN-NEXT:    buffer_store_dword v31, off, s[0:3], s33 offset:448 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v31, off, s[0:3], s33 offset:444 ; 4-byte Folded Spill
 ; GCN-NEXT:    v_mov_b32_e32 v0, 0
 ; GCN-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:440
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    buffer_load_dword v31, off, s[0:3], s33 offset:448 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v31, off, s[0:3], s33 offset:444 ; 4-byte Folded Reload
 ; GCN-NEXT:    s_getpc_b64 s[16:17]
 ; GCN-NEXT:    s_add_u32 s16, s16, child_function at gotpcrel32@lo+4
 ; GCN-NEXT:    s_addc_u32 s17, s17, child_function at gotpcrel32@hi+12
@@ -558,7 +558,7 @@ define void @spill_to_lowest_available_vgpr() #0 {
 ; GCN-NEXT:    buffer_load_dword v41, off, s[0:3], s33 offset:432 ; 4-byte Folded Reload
 ; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s33 offset:436 ; 4-byte Folded Reload
 ; GCN-NEXT:    s_or_saveexec_b64 s[4:5], -1
-; GCN-NEXT:    buffer_load_dword v254, off, s[0:3], s33 offset:444 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v254, off, s[0:3], s33 offset:448 ; 4-byte Folded Reload
 ; GCN-NEXT:    s_mov_b64 exec, s[4:5]
 ; GCN-NEXT:    s_add_i32 s32, s32, 0xffff8c00
 ; GCN-NEXT:    s_mov_b32 s33, s18
@@ -602,8 +602,8 @@ define void @spill_sgpr_with_sgpr_uses() #0 {
 ; GCN-LABEL: spill_sgpr_with_sgpr_uses:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    s_xor_saveexec_b64 s[4:5], -1
-; GCN-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:448 ; 4-byte Folded Spill
+; GCN-NEXT:    s_or_saveexec_b64 s[4:5], -1
+; GCN-NEXT:    buffer_store_dword v254, off, s[0:3], s32 offset:448 ; 4-byte Folded Spill
 ; GCN-NEXT:    s_mov_b64 exec, s[4:5]
 ; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s32 offset:436 ; 4-byte Folded Spill
 ; GCN-NEXT:    buffer_store_dword v41, off, s[0:3], s32 offset:432 ; 4-byte Folded Spill
@@ -715,38 +715,30 @@ define void @spill_sgpr_with_sgpr_uses() #0 {
 ; GCN-NEXT:    buffer_store_dword v251, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
 ; GCN-NEXT:    buffer_store_dword v252, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
 ; GCN-NEXT:    buffer_store_dword v253, off, s[0:3], s32 ; 4-byte Folded Spill
-; GCN-NEXT:    ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
 ; GCN-NEXT:    v_mov_b32_e32 v0, 0
 ; GCN-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:440
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    s_or_saveexec_b64 s[8:9], -1
-; GCN-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:444 ; 4-byte Folded Reload
-; GCN-NEXT:    s_mov_b64 exec, s[8:9]
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; def s4
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    v_writelane_b32 v0, s4, 0
+; GCN-NEXT:    ; implicit-def: $vgpr254 : SGPR spill to VGPR lane
+; GCN-NEXT:    v_writelane_b32 v254, s4, 0
 ; GCN-NEXT:    s_or_saveexec_b64 s[8:9], -1
-; GCN-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:444 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v254, off, s[0:3], s32 offset:444 ; 4-byte Folded Spill
 ; GCN-NEXT:    s_mov_b64 exec, s[8:9]
 ; GCN-NEXT:    s_cbranch_scc1 .LBB3_2
 ; GCN-NEXT:  ; %bb.1: ; %bb0
 ; GCN-NEXT:    s_or_saveexec_b64 s[8:9], -1
-; GCN-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:444 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v254, off, s[0:3], s32 offset:444 ; 4-byte Folded Reload
 ; GCN-NEXT:    s_mov_b64 exec, s[8:9]
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    v_readlane_b32 s4, v0, 0
+; GCN-NEXT:    v_readlane_b32 s4, v254, 0
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; use s4
 ; GCN-NEXT:    ;;#ASMEND
 ; GCN-NEXT:  .LBB3_2: ; %ret
-; GCN-NEXT:    s_or_saveexec_b64 s[8:9], -1
-; GCN-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:444 ; 4-byte Folded Reload
-; GCN-NEXT:    s_mov_b64 exec, s[8:9]
-; GCN-NEXT:    ; kill: killed $vgpr0
 ; GCN-NEXT:    buffer_load_dword v253, off, s[0:3], s32 ; 4-byte Folded Reload
 ; GCN-NEXT:    buffer_load_dword v252, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
 ; GCN-NEXT:    buffer_load_dword v251, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
@@ -857,8 +849,8 @@ define void @spill_sgpr_with_sgpr_uses() #0 {
 ; GCN-NEXT:    buffer_load_dword v42, off, s[0:3], s32 offset:428 ; 4-byte Folded Reload
 ; GCN-NEXT:    buffer_load_dword v41, off, s[0:3], s32 offset:432 ; 4-byte Folded Reload
 ; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s32 offset:436 ; 4-byte Folded Reload
-; GCN-NEXT:    s_xor_saveexec_b64 s[4:5], -1
-; GCN-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:448 ; 4-byte Folded Reload
+; GCN-NEXT:    s_or_saveexec_b64 s[4:5], -1
+; GCN-NEXT:    buffer_load_dword v254, off, s[0:3], s32 offset:448 ; 4-byte Folded Reload
 ; GCN-NEXT:    s_mov_b64 exec, s[4:5]
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
@@ -1183,7 +1175,7 @@ define void @spill_sgpr_no_free_vgpr(ptr addrspace(1) %out, ptr addrspace(1) %in
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    s_xor_saveexec_b64 s[4:5], -1
-; GCN-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:448 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:464 ; 4-byte Folded Spill
 ; GCN-NEXT:    s_mov_b64 exec, s[4:5]
 ; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s32 offset:444 ; 4-byte Folded Spill
 ; GCN-NEXT:    buffer_store_dword v41, off, s[0:3], s32 offset:440 ; 4-byte Folded Spill
@@ -1315,17 +1307,17 @@ define void @spill_sgpr_no_free_vgpr(ptr addrspace(1) %out, ptr addrspace(1) %in
 ; GCN-NEXT:    ; implicit-def: $sgpr4_sgpr5
 ; GCN-NEXT:    flat_load_dwordx4 v[6:9], v[2:3]
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:452 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:448 ; 4-byte Folded Spill
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:456 ; 4-byte Folded Spill
-; GCN-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:460 ; 4-byte Folded Spill
-; GCN-NEXT:    buffer_store_dword v9, off, s[0:3], s32 offset:464 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:452 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:456 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v9, off, s[0:3], s32 offset:460 ; 4-byte Folded Spill
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:452 ; 4-byte Folded Reload
-; GCN-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:456 ; 4-byte Folded Reload
-; GCN-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:460 ; 4-byte Folded Reload
-; GCN-NEXT:    buffer_load_dword v9, off, s[0:3], s32 offset:464 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:448 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:452 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:456 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v9, off, s[0:3], s32 offset:460 ; 4-byte Folded Reload
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ;;#ASMEND
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
@@ -1447,7 +1439,7 @@ define void @spill_sgpr_no_free_vgpr(ptr addrspace(1) %out, ptr addrspace(1) %in
 ; GCN-NEXT:    buffer_load_dword v41, off, s[0:3], s32 offset:440 ; 4-byte Folded Reload
 ; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s32 offset:444 ; 4-byte Folded Reload
 ; GCN-NEXT:    s_xor_saveexec_b64 s[4:5], -1
-; GCN-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:448 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:464 ; 4-byte Folded Reload
 ; GCN-NEXT:    s_mov_b64 exec, s[4:5]
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll b/llvm/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll
index c5a5a5209f54f..afee1caa7ab4b 100644
--- a/llvm/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll
+++ b/llvm/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll
@@ -1,5 +1,7 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=SGPR %s
 
+; XFAIL: *
+
 ; Make sure this doesn't crash.
 ; ALL-LABEL: {{^}}test:
 ; ALL: s_mov_b32 s[[LO:[0-9]+]], SCRATCH_RSRC_DWORD0
diff --git a/llvm/test/CodeGen/AMDGPU/snippet-copy-bundle-regression.mir b/llvm/test/CodeGen/AMDGPU/snippet-copy-bundle-regression.mir
index f8ec6bb5d943f..080bd052a7391 100644
--- a/llvm/test/CodeGen/AMDGPU/snippet-copy-bundle-regression.mir
+++ b/llvm/test/CodeGen/AMDGPU/snippet-copy-bundle-regression.mir
@@ -35,7 +35,7 @@ body:             |
   ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   renamable $sgpr34_sgpr35 = IMPLICIT_DEF
-  ; CHECK-NEXT:   dead renamable $vgpr0 = IMPLICIT_DEF
+  ; CHECK-NEXT:   dead [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
   ; CHECK-NEXT:   renamable $sgpr41 = IMPLICIT_DEF
   ; CHECK-NEXT:   renamable $sgpr38_sgpr39 = COPY undef $sgpr8_sgpr9
   ; CHECK-NEXT:   renamable $sgpr36_sgpr37 = IMPLICIT_DEF
@@ -79,9 +79,9 @@ body:             |
   ; CHECK-NEXT:   liveins: $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11:0x00000000000000F0, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51:0x000000000000FC00
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   dead renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr38_sgpr39, 40, 0 :: (dereferenceable invariant load (s64), addrspace 4)
-  ; CHECK-NEXT:   GLOBAL_STORE_DWORD_SADDR undef renamable $vgpr0, undef renamable $vgpr0, killed renamable $sgpr6_sgpr7, 0, 0, implicit $exec :: (store (s32), addrspace 1)
-  ; CHECK-NEXT:   GLOBAL_STORE_DWORD_SADDR undef renamable $vgpr0, undef renamable $vgpr0, renamable $sgpr50_sgpr51, 0, 0, implicit $exec :: (store (s32), addrspace 1)
-  ; CHECK-NEXT:   dead renamable $vgpr0 = COPY killed renamable $sgpr49
+  ; CHECK-NEXT:   GLOBAL_STORE_DWORD_SADDR undef [[DEF]], undef [[DEF]], killed renamable $sgpr6_sgpr7, 0, 0, implicit $exec :: (store (s32), addrspace 1)
+  ; CHECK-NEXT:   GLOBAL_STORE_DWORD_SADDR undef [[DEF]], undef [[DEF]], renamable $sgpr50_sgpr51, 0, 0, implicit $exec :: (store (s32), addrspace 1)
+  ; CHECK-NEXT:   dead [[COPY:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr49
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
   ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY killed renamable $sgpr36_sgpr37
   ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY killed renamable $sgpr34_sgpr35
diff --git a/llvm/test/CodeGen/AMDGPU/spill-csr-frame-ptr-reg-copy.ll b/llvm/test/CodeGen/AMDGPU/spill-csr-frame-ptr-reg-copy.ll
index b045dd559aac2..03988c3994992 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-csr-frame-ptr-reg-copy.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-csr-frame-ptr-reg-copy.ll
@@ -2,22 +2,20 @@
 
 ; GCN-LABEL: {{^}}spill_csr_s5_copy:
 ; GCN: s_mov_b32 [[FP_SCRATCH_COPY:s[0-9]+]], s33
-; GCN: s_xor_saveexec_b64
-; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
-; GCN-NEXT: s_mov_b64 exec, -1
+; GCN: s_or_saveexec_b64
 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
+; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
 ; GCN-NEXT: s_mov_b64 exec
-; GCN: v_writelane_b32 v40, [[FP_SCRATCH_COPY]], 4
+; GCN: v_writelane_b32 v40, [[FP_SCRATCH_COPY]], 2
 ; GCN: s_swappc_b64
 
 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 9
 ; GCN: buffer_store_dword [[K]], off, s[0:3], s33{{$}}
 
-; GCN: v_readlane_b32 [[FP_SCRATCH_COPY:s[0-9]+]], v40, 4
-; GCN: s_xor_saveexec_b64
-; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
-; GCN-NEXT: s_mov_b64 exec, -1
+; GCN: v_readlane_b32 [[FP_SCRATCH_COPY:s[0-9]+]], v40, 2
+; GCN: s_or_saveexec_b64
 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
+; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
 ; GCN: s_mov_b64 exec
 ; GCN: s_mov_b32 s33, [[FP_SCRATCH_COPY]]
 ; GCN: s_setpc_b64
diff --git a/llvm/test/CodeGen/AMDGPU/spill-empty-live-interval.mir b/llvm/test/CodeGen/AMDGPU/spill-empty-live-interval.mir
index e54e5898f8b53..40be0c6b67ee9 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-empty-live-interval.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill-empty-live-interval.mir
@@ -22,14 +22,11 @@ body:             |
   ; CHECK-NEXT:   undef [[V_MOV_B32_e32_:%[0-9]+]].sub1:vreg_64 = V_MOV_B32_e32 1786773504, implicit $exec
   ; CHECK-NEXT:   dead [[V_MUL_F32_e32_:%[0-9]+]]:vgpr_32 = V_MUL_F32_e32 0, [[V_MOV_B32_e32_]].sub1, implicit $mode, implicit $exec
   ; CHECK-NEXT:   undef [[V_MAC_F32_e32_:%[0-9]+]].sub1:vreg_64 = V_MAC_F32_e32 0, undef %1:vgpr_32, undef [[V_MAC_F32_e32_]].sub1, implicit $mode, implicit $exec
-  ; CHECK-NEXT:   SI_SPILL_V64_SAVE [[V_MAC_F32_e32_]], %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5)
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1:
-  ; CHECK-NEXT:   undef [[V_MOV_B32_e32_1:%[0-9]+]].sub1:vreg_64 = V_MOV_B32_e32 1786773504, implicit $exec
-  ; CHECK-NEXT:   S_NOP 0, implicit [[V_MOV_B32_e32_1]].sub1
-  ; CHECK-NEXT:   [[SI_SPILL_V64_RESTORE:%[0-9]+]]:vreg_64 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5)
-  ; CHECK-NEXT:   S_NOP 0, implicit [[SI_SPILL_V64_RESTORE]].sub1
-  ; CHECK-NEXT:   S_NOP 0, implicit undef %9.sub0:vreg_64
+  ; CHECK-NEXT:   S_NOP 0, implicit [[V_MOV_B32_e32_]].sub1
+  ; CHECK-NEXT:   S_NOP 0, implicit [[V_MAC_F32_e32_]].sub1
+  ; CHECK-NEXT:   S_NOP 0, implicit undef [[V_MAC_F32_e32_]].sub0
   bb.0:
     successors: %bb.1
 
@@ -59,13 +56,13 @@ body:             |
   ; CHECK: bb.0:
   ; CHECK-NEXT:   successors: %bb.1(0x80000000)
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   undef [[V_MOV_B32_e32_:%[0-9]+]].sub2:vreg_128 = V_MOV_B32_e32 1786773504, implicit $exec
+  ; CHECK-NEXT:   undef [[V_MOV_B32_e32_:%[0-9]+]].sub2:vreg_128 = V_MOV_B32_e32 0, implicit $exec
+  ; CHECK-NEXT:   undef [[V_MOV_B32_e32_1:%[0-9]+]].sub2:vreg_128 = V_MOV_B32_e32 1786773504, implicit $exec
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1:
-  ; CHECK-NEXT:   S_NOP 0, implicit [[V_MOV_B32_e32_]].sub2
-  ; CHECK-NEXT:   S_NOP 0, implicit undef %4.sub0:vreg_128
-  ; CHECK-NEXT:   undef [[V_MOV_B32_e32_1:%[0-9]+]].sub2:vreg_128 = V_MOV_B32_e32 0, implicit $exec
   ; CHECK-NEXT:   S_NOP 0, implicit [[V_MOV_B32_e32_1]].sub2
+  ; CHECK-NEXT:   S_NOP 0, implicit undef [[V_MOV_B32_e32_]].sub0
+  ; CHECK-NEXT:   S_NOP 0, implicit [[V_MOV_B32_e32_]].sub2
   bb.0:
     successors: %bb.1
 
diff --git a/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir b/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir
index 05e1082de4478..f4edafd9443ab 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir
@@ -25,14 +25,13 @@ body:             |
     ; GCN-NEXT: $sgpr8_sgpr9 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
     ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
     ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr8_sgpr9
-    ; GCN-NEXT: renamable $vgpr0 = IMPLICIT_DEF
     ; GCN-NEXT: renamable $sgpr1 = COPY $sgpr2
-    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr0, 0, killed $vgpr0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3
-    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr1, 1, killed $vgpr0
-    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr2, 2, killed $vgpr0
-    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr3, 3, killed $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+    ; GCN-NEXT: $vgpr0 = IMPLICIT_DEF
+    ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr0, 0, killed $vgpr0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+    ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr1, 1, killed $vgpr0
+    ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr2, 2, killed $vgpr0
+    ; GCN-NEXT: dead $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr3, 3, killed $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
     ; GCN-NEXT: renamable $sgpr8 = COPY renamable $sgpr1
-    ; GCN-NEXT: KILL killed renamable $vgpr0
     ; GCN-NEXT: $sgpr0_sgpr1 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
     ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
     ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1
@@ -64,13 +63,12 @@ body:             |
     ; GCN-NEXT: $sgpr8_sgpr9 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
     ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
     ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr8_sgpr9
-    ; GCN-NEXT: renamable $vgpr0 = IMPLICIT_DEF
     ; GCN-NEXT: renamable $sgpr1 = COPY $sgpr2
-    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr0, 0, killed $vgpr0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3
-    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr1, 1, killed $vgpr0
-    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr2, 2, killed $vgpr0
-    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr3, 3, killed $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
-    ; GCN-NEXT: KILL killed renamable $vgpr0
+    ; GCN-NEXT: $vgpr0 = IMPLICIT_DEF
+    ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr0, 0, killed $vgpr0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+    ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr1, 1, killed $vgpr0
+    ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr2, 2, killed $vgpr0
+    ; GCN-NEXT: dead $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr3, 3, killed $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
     ; GCN-NEXT: $sgpr0_sgpr1 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
     ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
     ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1
diff --git a/llvm/test/CodeGen/AMDGPU/spill-sgpr-to-virtual-vgpr.mir b/llvm/test/CodeGen/AMDGPU/spill-sgpr-to-virtual-vgpr.mir
index 11babc82e919b..dff2bd7f7aef9 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-sgpr-to-virtual-vgpr.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill-sgpr-to-virtual-vgpr.mir
@@ -21,8 +21,8 @@ body:             |
     ; GCN-LABEL: name: sgpr32_spill
     ; GCN: liveins: $sgpr30_sgpr31, $sgpr10
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
     ; GCN-NEXT: S_NOP 0
+    ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
     ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr10, 0, [[DEF]]
     ; GCN-NEXT: $sgpr10 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 0
     ; GCN-NEXT: S_SETPC_B64 $sgpr30_sgpr31
@@ -55,7 +55,6 @@ body:             |
     ; GCN-LABEL: name: sgpr_spill_lane_crossover
     ; GCN: liveins: $sgpr10, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $vgpr63, $sgpr30_sgpr31, $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71, $sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79, $sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87, $sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
     ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr64, 0, $vgpr63
     ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr65, 1, $vgpr63
     ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr66, 2, $vgpr63
@@ -89,6 +88,7 @@ body:             |
     ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr94, 30, $vgpr63
     ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr95, 31, $vgpr63
     ; GCN-NEXT: S_NOP 0
+    ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
     ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr10, 0, [[DEF]]
     ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr64, 1, [[DEF]], implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
     ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr65, 2, [[DEF]]
@@ -187,9 +187,9 @@ body:             |
   ; GCN-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
   ; GCN-NEXT:   liveins: $sgpr10, $sgpr11, $sgpr30_sgpr31
   ; GCN-NEXT: {{  $}}
-  ; GCN-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
   ; GCN-NEXT:   S_NOP 0
   ; GCN-NEXT:   S_CMP_EQ_U32 $sgpr11, 0, implicit-def $scc
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
   ; GCN-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit killed $scc
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT: bb.1:
@@ -256,7 +256,6 @@ body:             |
   ; GCN-NEXT:   successors: %bb.3(0x80000000)
   ; GCN-NEXT:   liveins: $sgpr10, $sgpr11, $sgpr30_sgpr31
   ; GCN-NEXT: {{  $}}
-  ; GCN-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
   ; GCN-NEXT:   S_NOP 0
   ; GCN-NEXT:   S_BRANCH %bb.3
   ; GCN-NEXT: {{  $}}
@@ -264,7 +263,7 @@ body:             |
   ; GCN-NEXT:   successors: %bb.2(0x80000000)
   ; GCN-NEXT:   liveins: $sgpr10, $sgpr30_sgpr31
   ; GCN-NEXT: {{  $}}
-  ; GCN-NEXT:   $sgpr10 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 0
+  ; GCN-NEXT:   $sgpr10 = SI_RESTORE_S32_FROM_VGPR %0, 0
   ; GCN-NEXT:   $sgpr10 = S_ADD_I32 $sgpr10, 15, implicit-def dead $scc
   ; GCN-NEXT:   S_BRANCH %bb.2
   ; GCN-NEXT: {{  $}}
@@ -272,7 +271,7 @@ body:             |
   ; GCN-NEXT:   successors: %bb.3(0x80000000)
   ; GCN-NEXT:   liveins: $sgpr10, $sgpr30_sgpr31
   ; GCN-NEXT: {{  $}}
-  ; GCN-NEXT:   $sgpr10 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 0
+  ; GCN-NEXT:   $sgpr10 = SI_RESTORE_S32_FROM_VGPR %0, 0
   ; GCN-NEXT:   $sgpr10 = S_ADD_I32 $sgpr10, 20, implicit-def dead $scc
   ; GCN-NEXT:   S_BRANCH %bb.3
   ; GCN-NEXT: {{  $}}
@@ -281,6 +280,7 @@ body:             |
   ; GCN-NEXT:   liveins: $sgpr10, $sgpr11, $sgpr30_sgpr31
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   $sgpr10 = S_MOV_B32 10
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
   ; GCN-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr10, 0, [[DEF]]
   ; GCN-NEXT:   S_CMP_EQ_U32 $sgpr11, 0, implicit-def $scc
   ; GCN-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit killed $scc
diff --git a/llvm/test/CodeGen/AMDGPU/spill-sgpr-used-for-exec-copy.mir b/llvm/test/CodeGen/AMDGPU/spill-sgpr-used-for-exec-copy.mir
index 2caaabde38e9d..fa82a4efd89f4 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-sgpr-used-for-exec-copy.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill-sgpr-used-for-exec-copy.mir
@@ -19,12 +19,8 @@ body:             |
   bb.0:
     liveins: $sgpr30_sgpr31, $vgpr0
     ; GCN-LABEL: name: shift_back_exec_copy_reserved_reg
-    ; GCN: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+    ; GCN: liveins: $vgpr0, $sgpr30_sgpr31
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
     ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr30, 0, killed $vgpr0
     ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr31, 1, killed $vgpr0
     ; GCN-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
@@ -60,12 +56,10 @@ body:             |
   bb.0:
     liveins: $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr30_sgpr31, $vcc, $vgpr0
     ; GCN-LABEL: name: spill_exec_copy_reserved_reg
-    ; GCN: liveins: $vcc, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
+    ; GCN: liveins: $vcc, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $vgpr0, $vgpr2, $sgpr30_sgpr31
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: $sgpr28_sgpr29 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
     ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr28_sgpr29
     ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr34, 0, undef $vgpr2
     ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr35, 1, undef $vgpr2
diff --git a/llvm/test/CodeGen/AMDGPU/spill-vector-superclass.ll b/llvm/test/CodeGen/AMDGPU/spill-vector-superclass.ll
index d5f97314f9324..be3e0b4cbd63f 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-vector-superclass.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-vector-superclass.ll
@@ -12,12 +12,10 @@ define amdgpu_kernel void @test_spill_av_class(<4 x i32> %arg) #0 {
   ; GCN-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
   ; GCN-NEXT:   [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec
   ; GCN-NEXT:   [[V_MFMA_I32_4X4X4I8_e64_:%[0-9]+]]:areg_128 = V_MFMA_I32_4X4X4I8_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
-  ; GCN-NEXT:   INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 2228234 /* regdef:VGPR_32 */, def undef %24.sub0
-  ; GCN-NEXT:   SI_SPILL_V64_SAVE %24, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5)
+  ; GCN-NEXT:   INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 2228234 /* regdef:VGPR_32 */, def undef %6.sub0
   ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vreg_128 = COPY [[V_MFMA_I32_4X4X4I8_e64_]]
   ; GCN-NEXT:   GLOBAL_STORE_DWORDX4 undef %16:vreg_64, [[COPY1]], 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) undef`, addrspace 1)
-  ; GCN-NEXT:   [[SI_SPILL_V64_RESTORE:%[0-9]+]]:vreg_64 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5)
-  ; GCN-NEXT:   INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 3538953 /* reguse:VReg_64 */, [[SI_SPILL_V64_RESTORE]]
+  ; GCN-NEXT:   INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 3538953 /* reguse:VReg_64 */, %6
   ; GCN-NEXT:   S_ENDPGM 0
   %v0 = call i32 asm sideeffect "; def $0", "=v"()
   %tmp = insertelement <2 x i32> undef, i32 %v0, i32 0
diff --git a/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr-update-regscavenger.ll b/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr-update-regscavenger.ll
index 81dd2c4457b2f..4384d1e32cf53 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr-update-regscavenger.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr-update-regscavenger.ll
@@ -11,9 +11,8 @@ define void @test() {
 ; CHECK:       ; %bb.0: ; %bb.0
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; CHECK-NEXT:    s_xor_saveexec_b64 s[4:5], -1
-; CHECK-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; CHECK-NEXT:    buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill
 ; CHECK-NEXT:    s_mov_b64 exec, s[4:5]
-; CHECK-NEXT:    ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
 ; CHECK-NEXT:  .LBB0_1: ; %bb.1
 ; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    s_cbranch_scc1 .LBB0_3
@@ -21,42 +20,40 @@ define void @test() {
 ; CHECK-NEXT:    ; in Loop: Header=BB0_1 Depth=1
 ; CHECK-NEXT:  .LBB0_3: ; %bb.3
 ; CHECK-NEXT:    ; in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT:    s_or_saveexec_b64 s[10:11], -1
-; CHECK-NEXT:    v_accvgpr_read_b32 v0, a0 ; Reload Reuse
-; CHECK-NEXT:    s_mov_b64 exec, s[10:11]
 ; CHECK-NEXT:    ; implicit-def: $sgpr4
-; CHECK-NEXT:    v_mov_b32_e32 v1, s4
-; CHECK-NEXT:    v_readfirstlane_b32 s6, v1
+; CHECK-NEXT:    v_mov_b32_e32 v0, s4
+; CHECK-NEXT:    v_readfirstlane_b32 s6, v0
 ; CHECK-NEXT:    s_mov_b64 s[4:5], -1
 ; CHECK-NEXT:    s_mov_b32 s7, 0
 ; CHECK-NEXT:    s_cmp_eq_u32 s6, s7
-; CHECK-NEXT:    v_writelane_b32 v0, s4, 0
-; CHECK-NEXT:    v_writelane_b32 v0, s5, 1
+; CHECK-NEXT:    ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
+; CHECK-NEXT:    v_writelane_b32 v1, s4, 0
+; CHECK-NEXT:    v_writelane_b32 v1, s5, 1
 ; CHECK-NEXT:    s_mov_b64 s[10:11], exec
 ; CHECK-NEXT:    s_mov_b64 exec, -1
-; CHECK-NEXT:    v_accvgpr_write_b32 a0, v0 ; Reload Reuse
+; CHECK-NEXT:    v_accvgpr_write_b32 a0, v1 ; Reload Reuse
 ; CHECK-NEXT:    s_mov_b64 exec, s[10:11]
 ; CHECK-NEXT:    s_cbranch_scc1 .LBB0_5
 ; CHECK-NEXT:  ; %bb.4: ; %bb.4
 ; CHECK-NEXT:    ; in Loop: Header=BB0_1 Depth=1
 ; CHECK-NEXT:    s_or_saveexec_b64 s[10:11], -1
-; CHECK-NEXT:    v_accvgpr_read_b32 v0, a0 ; Reload Reuse
+; CHECK-NEXT:    v_accvgpr_read_b32 v1, a0 ; Reload Reuse
 ; CHECK-NEXT:    s_mov_b64 exec, s[10:11]
 ; CHECK-NEXT:    s_mov_b64 s[4:5], 0
-; CHECK-NEXT:    v_writelane_b32 v0, s4, 0
-; CHECK-NEXT:    v_writelane_b32 v0, s5, 1
+; CHECK-NEXT:    v_writelane_b32 v1, s4, 0
+; CHECK-NEXT:    v_writelane_b32 v1, s5, 1
 ; CHECK-NEXT:    s_or_saveexec_b64 s[10:11], -1
 ; CHECK-NEXT:    s_nop 0
-; CHECK-NEXT:    v_accvgpr_write_b32 a0, v0 ; Reload Reuse
+; CHECK-NEXT:    v_accvgpr_write_b32 a0, v1 ; Reload Reuse
 ; CHECK-NEXT:    s_mov_b64 exec, s[10:11]
 ; CHECK-NEXT:  .LBB0_5: ; %Flow
 ; CHECK-NEXT:    ; in Loop: Header=BB0_1 Depth=1
 ; CHECK-NEXT:    s_or_saveexec_b64 s[10:11], -1
 ; CHECK-NEXT:    s_nop 0
-; CHECK-NEXT:    v_accvgpr_read_b32 v0, a0 ; Reload Reuse
+; CHECK-NEXT:    v_accvgpr_read_b32 v1, a0 ; Reload Reuse
 ; CHECK-NEXT:    s_mov_b64 exec, s[10:11]
-; CHECK-NEXT:    v_readlane_b32 s4, v0, 0
-; CHECK-NEXT:    v_readlane_b32 s5, v0, 1
+; CHECK-NEXT:    v_readlane_b32 s4, v1, 0
+; CHECK-NEXT:    v_readlane_b32 s5, v1, 1
 ; CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
 ; CHECK-NEXT:    s_mov_b32 s4, 1
 ; CHECK-NEXT:    ; implicit-def: $sgpr5
@@ -64,12 +61,8 @@ define void @test() {
 ; CHECK-NEXT:    s_and_b64 vcc, exec, s[4:5]
 ; CHECK-NEXT:    s_cbranch_vccnz .LBB0_1
 ; CHECK-NEXT:  ; %bb.6: ; %bb.5
-; CHECK-NEXT:    s_or_saveexec_b64 s[10:11], -1
-; CHECK-NEXT:    v_accvgpr_read_b32 v0, a0 ; Reload Reuse
-; CHECK-NEXT:    s_mov_b64 exec, s[10:11]
-; CHECK-NEXT:    ; kill: killed $vgpr0
 ; CHECK-NEXT:    s_xor_saveexec_b64 s[4:5], -1
-; CHECK-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; CHECK-NEXT:    buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload
 ; CHECK-NEXT:    s_mov_b64 exec, s[4:5]
 ; CHECK-NEXT:    s_waitcnt vmcnt(0)
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/spill192.mir b/llvm/test/CodeGen/AMDGPU/spill192.mir
index 56882cd83106e..e6b15e4b51f09 100644
--- a/llvm/test/CodeGen/AMDGPU/spill192.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill192.mir
@@ -34,8 +34,8 @@ body: |
   ; EXPANDED: bb.0:
   ; EXPANDED-NEXT:   successors: %bb.1(0x80000000)
   ; EXPANDED-NEXT: {{  $}}
-  ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
   ; EXPANDED-NEXT:   S_NOP 0, implicit-def renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9
+  ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
   ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, [[DEF]], implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9
   ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr5, 1, [[DEF]]
   ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr6, 2, [[DEF]]
diff --git a/llvm/test/CodeGen/AMDGPU/spill224.mir b/llvm/test/CodeGen/AMDGPU/spill224.mir
index 4525fc1cb70e6..aa05a83ab070e 100644
--- a/llvm/test/CodeGen/AMDGPU/spill224.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill224.mir
@@ -32,8 +32,8 @@ body: |
   ; EXPANDED: bb.0:
   ; EXPANDED-NEXT:   successors: %bb.1(0x80000000)
   ; EXPANDED-NEXT: {{  $}}
-  ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
   ; EXPANDED-NEXT:   S_NOP 0, implicit-def renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10
+  ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
   ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, [[DEF]], implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10
   ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr5, 1, [[DEF]]
   ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr6, 2, [[DEF]]
diff --git a/llvm/test/CodeGen/AMDGPU/spill288.mir b/llvm/test/CodeGen/AMDGPU/spill288.mir
index 173056e6b9132..2c33a3f530264 100644
--- a/llvm/test/CodeGen/AMDGPU/spill288.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill288.mir
@@ -32,8 +32,8 @@ body: |
   ; EXPANDED: bb.0:
   ; EXPANDED-NEXT:   successors: %bb.1(0x80000000)
   ; EXPANDED-NEXT: {{  $}}
-  ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
   ; EXPANDED-NEXT:   S_NOP 0, implicit-def renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12
+  ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
   ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, [[DEF]], implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12
   ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr5, 1, [[DEF]]
   ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr6, 2, [[DEF]]
diff --git a/llvm/test/CodeGen/AMDGPU/spill320.mir b/llvm/test/CodeGen/AMDGPU/spill320.mir
index 828f524bb0677..32ffcf05201b4 100644
--- a/llvm/test/CodeGen/AMDGPU/spill320.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill320.mir
@@ -32,8 +32,8 @@ body: |
   ; EXPANDED: bb.0:
   ; EXPANDED-NEXT:   successors: %bb.1(0x80000000)
   ; EXPANDED-NEXT: {{  $}}
-  ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
   ; EXPANDED-NEXT:   S_NOP 0, implicit-def renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+  ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
   ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, [[DEF]], implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
   ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr5, 1, [[DEF]]
   ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr6, 2, [[DEF]]
diff --git a/llvm/test/CodeGen/AMDGPU/spill352.mir b/llvm/test/CodeGen/AMDGPU/spill352.mir
index ef620fae5b104..32d3d411140da 100644
--- a/llvm/test/CodeGen/AMDGPU/spill352.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill352.mir
@@ -32,8 +32,8 @@ body: |
   ; EXPANDED: bb.0:
   ; EXPANDED-NEXT:   successors: %bb.1(0x80000000)
   ; EXPANDED-NEXT: {{  $}}
-  ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
   ; EXPANDED-NEXT:   S_NOP 0, implicit-def renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14
+  ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
   ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, [[DEF]], implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14
   ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr5, 1, [[DEF]]
   ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr6, 2, [[DEF]]
diff --git a/llvm/test/CodeGen/AMDGPU/spill384.mir b/llvm/test/CodeGen/AMDGPU/spill384.mir
index 37cf5d0fc83cf..aedfdcaebfbc0 100644
--- a/llvm/test/CodeGen/AMDGPU/spill384.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill384.mir
@@ -32,8 +32,8 @@ body: |
   ; EXPANDED: bb.0:
   ; EXPANDED-NEXT:   successors: %bb.1(0x80000000)
   ; EXPANDED-NEXT: {{  $}}
-  ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
   ; EXPANDED-NEXT:   S_NOP 0, implicit-def renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+  ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
   ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, [[DEF]], implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
   ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr5, 1, [[DEF]]
   ; EXPANDED-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr6, 2, [[DEF]]
diff --git a/llvm/test/CodeGen/AMDGPU/stacksave_stackrestore.ll b/llvm/test/CodeGen/AMDGPU/stacksave_stackrestore.ll
index c6a599094fe43..28fbacdc67ba9 100644
--- a/llvm/test/CodeGen/AMDGPU/stacksave_stackrestore.ll
+++ b/llvm/test/CodeGen/AMDGPU/stacksave_stackrestore.ll
@@ -236,20 +236,15 @@ define void @func_stacksave_nonentry_block(i1 %cond) {
 ; WAVE32-O0:       ; %bb.0: ; %bb0
 ; WAVE32-O0-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; WAVE32-O0-NEXT:    s_xor_saveexec_b32 s4, -1
-; WAVE32-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; WAVE32-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
 ; WAVE32-O0-NEXT:    s_mov_b32 exec_lo, s4
-; WAVE32-O0-NEXT:    ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
-; WAVE32-O0-NEXT:    v_mov_b32_e32 v1, v0
-; WAVE32-O0-NEXT:    s_or_saveexec_b32 s7, -1
-; WAVE32-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
-; WAVE32-O0-NEXT:    s_mov_b32 exec_lo, s7
-; WAVE32-O0-NEXT:    v_and_b32_e64 v1, 1, v1
-; WAVE32-O0-NEXT:    v_cmp_eq_u32_e64 s5, v1, 1
+; WAVE32-O0-NEXT:    v_and_b32_e64 v0, 1, v0
+; WAVE32-O0-NEXT:    v_cmp_eq_u32_e64 s5, v0, 1
 ; WAVE32-O0-NEXT:    s_mov_b32 s4, exec_lo
-; WAVE32-O0-NEXT:    s_waitcnt vmcnt(0)
-; WAVE32-O0-NEXT:    v_writelane_b32 v0, s4, 0
+; WAVE32-O0-NEXT:    ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
+; WAVE32-O0-NEXT:    v_writelane_b32 v1, s4, 0
 ; WAVE32-O0-NEXT:    s_or_saveexec_b32 s7, -1
-; WAVE32-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; WAVE32-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill
 ; WAVE32-O0-NEXT:    s_mov_b32 exec_lo, s7
 ; WAVE32-O0-NEXT:    s_and_b32 s4, s4, s5
 ; WAVE32-O0-NEXT:    s_mov_b32 exec_lo, s4
@@ -262,14 +257,13 @@ define void @func_stacksave_nonentry_block(i1 %cond) {
 ; WAVE32-O0-NEXT:    ;;#ASMEND
 ; WAVE32-O0-NEXT:  .LBB4_2: ; %bb2
 ; WAVE32-O0-NEXT:    s_or_saveexec_b32 s7, -1
-; WAVE32-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; WAVE32-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload
 ; WAVE32-O0-NEXT:    s_mov_b32 exec_lo, s7
 ; WAVE32-O0-NEXT:    s_waitcnt vmcnt(0)
-; WAVE32-O0-NEXT:    v_readlane_b32 s4, v0, 0
+; WAVE32-O0-NEXT:    v_readlane_b32 s4, v1, 0
 ; WAVE32-O0-NEXT:    s_or_b32 exec_lo, exec_lo, s4
-; WAVE32-O0-NEXT:    ; kill: killed $vgpr0
 ; WAVE32-O0-NEXT:    s_xor_saveexec_b32 s4, -1
-; WAVE32-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
+; WAVE32-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
 ; WAVE32-O0-NEXT:    s_mov_b32 exec_lo, s4
 ; WAVE32-O0-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE32-O0-NEXT:    s_setpc_b64 s[30:31]
@@ -278,21 +272,16 @@ define void @func_stacksave_nonentry_block(i1 %cond) {
 ; WAVE64-O0:       ; %bb.0: ; %bb0
 ; WAVE64-O0-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; WAVE64-O0-NEXT:    s_xor_saveexec_b64 s[4:5], -1
-; WAVE64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; WAVE64-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
 ; WAVE64-O0-NEXT:    s_mov_b64 exec, s[4:5]
-; WAVE64-O0-NEXT:    ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
-; WAVE64-O0-NEXT:    v_mov_b32_e32 v1, v0
-; WAVE64-O0-NEXT:    s_or_saveexec_b64 s[10:11], -1
-; WAVE64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
-; WAVE64-O0-NEXT:    s_mov_b64 exec, s[10:11]
-; WAVE64-O0-NEXT:    v_and_b32_e64 v1, 1, v1
-; WAVE64-O0-NEXT:    v_cmp_eq_u32_e64 s[6:7], v1, 1
+; WAVE64-O0-NEXT:    v_and_b32_e64 v0, 1, v0
+; WAVE64-O0-NEXT:    v_cmp_eq_u32_e64 s[6:7], v0, 1
 ; WAVE64-O0-NEXT:    s_mov_b64 s[4:5], exec
-; WAVE64-O0-NEXT:    s_waitcnt vmcnt(0)
-; WAVE64-O0-NEXT:    v_writelane_b32 v0, s4, 0
-; WAVE64-O0-NEXT:    v_writelane_b32 v0, s5, 1
+; WAVE64-O0-NEXT:    ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
+; WAVE64-O0-NEXT:    v_writelane_b32 v1, s4, 0
+; WAVE64-O0-NEXT:    v_writelane_b32 v1, s5, 1
 ; WAVE64-O0-NEXT:    s_or_saveexec_b64 s[10:11], -1
-; WAVE64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; WAVE64-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill
 ; WAVE64-O0-NEXT:    s_mov_b64 exec, s[10:11]
 ; WAVE64-O0-NEXT:    s_and_b64 s[4:5], s[4:5], s[6:7]
 ; WAVE64-O0-NEXT:    s_mov_b64 exec, s[4:5]
@@ -305,15 +294,14 @@ define void @func_stacksave_nonentry_block(i1 %cond) {
 ; WAVE64-O0-NEXT:    ;;#ASMEND
 ; WAVE64-O0-NEXT:  .LBB4_2: ; %bb2
 ; WAVE64-O0-NEXT:    s_or_saveexec_b64 s[10:11], -1
-; WAVE64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; WAVE64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload
 ; WAVE64-O0-NEXT:    s_mov_b64 exec, s[10:11]
 ; WAVE64-O0-NEXT:    s_waitcnt vmcnt(0)
-; WAVE64-O0-NEXT:    v_readlane_b32 s4, v0, 0
-; WAVE64-O0-NEXT:    v_readlane_b32 s5, v0, 1
+; WAVE64-O0-NEXT:    v_readlane_b32 s4, v1, 0
+; WAVE64-O0-NEXT:    v_readlane_b32 s5, v1, 1
 ; WAVE64-O0-NEXT:    s_or_b64 exec, exec, s[4:5]
-; WAVE64-O0-NEXT:    ; kill: killed $vgpr0
 ; WAVE64-O0-NEXT:    s_xor_saveexec_b64 s[4:5], -1
-; WAVE64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
+; WAVE64-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
 ; WAVE64-O0-NEXT:    s_mov_b64 exec, s[4:5]
 ; WAVE64-O0-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-O0-NEXT:    s_setpc_b64 s[30:31]
@@ -324,10 +312,10 @@ define void @func_stacksave_nonentry_block(i1 %cond) {
 ; WAVE32-WWM-PREALLOC-NEXT:    s_xor_saveexec_b32 s4, -1
 ; WAVE32-WWM-PREALLOC-NEXT:    buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill
 ; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b32 exec_lo, s4
-; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
 ; WAVE32-WWM-PREALLOC-NEXT:    v_and_b32_e64 v0, 1, v0
 ; WAVE32-WWM-PREALLOC-NEXT:    v_cmp_eq_u32_e64 s5, v0, 1
 ; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b32 s4, exec_lo
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
 ; WAVE32-WWM-PREALLOC-NEXT:    v_writelane_b32 v1, s4, 0
 ; WAVE32-WWM-PREALLOC-NEXT:    s_and_b32 s4, s4, s5
 ; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b32 exec_lo, s4
@@ -341,7 +329,6 @@ define void @func_stacksave_nonentry_block(i1 %cond) {
 ; WAVE32-WWM-PREALLOC-NEXT:  .LBB4_2: ; %bb2
 ; WAVE32-WWM-PREALLOC-NEXT:    v_readlane_b32 s4, v1, 0
 ; WAVE32-WWM-PREALLOC-NEXT:    s_or_b32 exec_lo, exec_lo, s4
-; WAVE32-WWM-PREALLOC-NEXT:    ; kill: killed $vgpr1
 ; WAVE32-WWM-PREALLOC-NEXT:    s_xor_saveexec_b32 s4, -1
 ; WAVE32-WWM-PREALLOC-NEXT:    buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload
 ; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b32 exec_lo, s4
@@ -923,7 +910,6 @@ define amdgpu_kernel void @kernel_stacksave_stackrestore_call_with_stack_objects
 ; WAVE32-O0-NEXT:    s_bitset0_b32 s23, 21
 ; WAVE32-O0-NEXT:    s_add_u32 s20, s20, s9
 ; WAVE32-O0-NEXT:    s_addc_u32 s21, s21, 0
-; WAVE32-O0-NEXT:    ; implicit-def: $vgpr3 : SGPR spill to VGPR lane
 ; WAVE32-O0-NEXT:    s_mov_b32 s14, s8
 ; WAVE32-O0-NEXT:    s_mov_b32 s13, s7
 ; WAVE32-O0-NEXT:    s_mov_b32 s12, s6
@@ -931,12 +917,10 @@ define amdgpu_kernel void @kernel_stacksave_stackrestore_call_with_stack_objects
 ; WAVE32-O0-NEXT:    s_mov_b64 s[8:9], s[2:3]
 ; WAVE32-O0-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; WAVE32-O0-NEXT:    s_mov_b32 s0, s32
-; WAVE32-O0-NEXT:    v_writelane_b32 v3, s0, 0
+; WAVE32-O0-NEXT:    ; implicit-def: $vgpr32 : SGPR spill to VGPR lane
+; WAVE32-O0-NEXT:    v_writelane_b32 v32, s0, 0
 ; WAVE32-O0-NEXT:    s_lshr_b32 s0, s0, 5
-; WAVE32-O0-NEXT:    v_writelane_b32 v3, s0, 1
-; WAVE32-O0-NEXT:    s_or_saveexec_b32 s19, -1
-; WAVE32-O0-NEXT:    buffer_store_dword v3, off, s[20:23], 0 offset:128 ; 4-byte Folded Spill
-; WAVE32-O0-NEXT:    s_mov_b32 exec_lo, s19
+; WAVE32-O0-NEXT:    v_writelane_b32 v32, s0, 1
 ; WAVE32-O0-NEXT:    v_mov_b32_e32 v3, 42
 ; WAVE32-O0-NEXT:    buffer_store_dword v3, off, s[20:23], 0
 ; WAVE32-O0-NEXT:    s_waitcnt_vscnt null, 0x0
@@ -1019,17 +1003,12 @@ define amdgpu_kernel void @kernel_stacksave_stackrestore_call_with_stack_objects
 ; WAVE32-O0-NEXT:    ; implicit-def: $sgpr18
 ; WAVE32-O0-NEXT:    v_mov_b32_e32 v30, s18
 ; WAVE32-O0-NEXT:    s_swappc_b64 s[30:31], s[16:17]
-; WAVE32-O0-NEXT:    s_or_saveexec_b32 s19, -1
-; WAVE32-O0-NEXT:    buffer_load_dword v0, off, s[20:23], 0 offset:128 ; 4-byte Folded Reload
-; WAVE32-O0-NEXT:    s_mov_b32 exec_lo, s19
-; WAVE32-O0-NEXT:    s_waitcnt vmcnt(0)
-; WAVE32-O0-NEXT:    v_readlane_b32 s1, v0, 1
-; WAVE32-O0-NEXT:    v_readlane_b32 s0, v0, 0
+; WAVE32-O0-NEXT:    v_readlane_b32 s1, v32, 1
+; WAVE32-O0-NEXT:    v_readlane_b32 s0, v32, 0
 ; WAVE32-O0-NEXT:    ;;#ASMSTART
 ; WAVE32-O0-NEXT:    ; use s1
 ; WAVE32-O0-NEXT:    ;;#ASMEND
 ; WAVE32-O0-NEXT:    s_mov_b32 s32, s0
-; WAVE32-O0-NEXT:    ; kill: killed $vgpr0
 ; WAVE32-O0-NEXT:    s_endpgm
 ;
 ; WAVE64-O0-LABEL: kernel_stacksave_stackrestore_call_with_stack_objects:
@@ -1041,7 +1020,6 @@ define amdgpu_kernel void @kernel_stacksave_stackrestore_call_with_stack_objects
 ; WAVE64-O0-NEXT:    s_waitcnt lgkmcnt(0)
 ; WAVE64-O0-NEXT:    s_add_u32 s24, s24, s9
 ; WAVE64-O0-NEXT:    s_addc_u32 s25, s25, 0
-; WAVE64-O0-NEXT:    ; implicit-def: $vgpr3 : SGPR spill to VGPR lane
 ; WAVE64-O0-NEXT:    s_mov_b32 s14, s8
 ; WAVE64-O0-NEXT:    s_mov_b32 s13, s7
 ; WAVE64-O0-NEXT:    s_mov_b32 s12, s6
@@ -1049,12 +1027,10 @@ define amdgpu_kernel void @kernel_stacksave_stackrestore_call_with_stack_objects
 ; WAVE64-O0-NEXT:    s_mov_b64 s[8:9], s[2:3]
 ; WAVE64-O0-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; WAVE64-O0-NEXT:    s_mov_b32 s0, s32
-; WAVE64-O0-NEXT:    v_writelane_b32 v3, s0, 0
+; WAVE64-O0-NEXT:    ; implicit-def: $vgpr32 : SGPR spill to VGPR lane
+; WAVE64-O0-NEXT:    v_writelane_b32 v32, s0, 0
 ; WAVE64-O0-NEXT:    s_lshr_b32 s0, s0, 6
-; WAVE64-O0-NEXT:    v_writelane_b32 v3, s0, 1
-; WAVE64-O0-NEXT:    s_or_saveexec_b64 s[20:21], -1
-; WAVE64-O0-NEXT:    buffer_store_dword v3, off, s[24:27], 0 offset:128 ; 4-byte Folded Spill
-; WAVE64-O0-NEXT:    s_mov_b64 exec, s[20:21]
+; WAVE64-O0-NEXT:    v_writelane_b32 v32, s0, 1
 ; WAVE64-O0-NEXT:    v_mov_b32_e32 v3, 42
 ; WAVE64-O0-NEXT:    buffer_store_dword v3, off, s[24:27], 0
 ; WAVE64-O0-NEXT:    s_waitcnt_vscnt null, 0x0
@@ -1137,17 +1113,12 @@ define amdgpu_kernel void @kernel_stacksave_stackrestore_call_with_stack_objects
 ; WAVE64-O0-NEXT:    ; implicit-def: $sgpr18
 ; WAVE64-O0-NEXT:    v_mov_b32_e32 v30, s18
 ; WAVE64-O0-NEXT:    s_swappc_b64 s[30:31], s[16:17]
-; WAVE64-O0-NEXT:    s_or_saveexec_b64 s[20:21], -1
-; WAVE64-O0-NEXT:    buffer_load_dword v0, off, s[24:27], 0 offset:128 ; 4-byte Folded Reload
-; WAVE64-O0-NEXT:    s_mov_b64 exec, s[20:21]
-; WAVE64-O0-NEXT:    s_waitcnt vmcnt(0)
-; WAVE64-O0-NEXT:    v_readlane_b32 s1, v0, 1
-; WAVE64-O0-NEXT:    v_readlane_b32 s0, v0, 0
+; WAVE64-O0-NEXT:    v_readlane_b32 s1, v32, 1
+; WAVE64-O0-NEXT:    v_readlane_b32 s0, v32, 0
 ; WAVE64-O0-NEXT:    ;;#ASMSTART
 ; WAVE64-O0-NEXT:    ; use s1
 ; WAVE64-O0-NEXT:    ;;#ASMEND
 ; WAVE64-O0-NEXT:    s_mov_b32 s32, s0
-; WAVE64-O0-NEXT:    ; kill: killed $vgpr0
 ; WAVE64-O0-NEXT:    s_endpgm
 ;
 ; WAVE32-WWM-PREALLOC-LABEL: kernel_stacksave_stackrestore_call_with_stack_objects:
@@ -1160,7 +1131,6 @@ define amdgpu_kernel void @kernel_stacksave_stackrestore_call_with_stack_objects
 ; WAVE32-WWM-PREALLOC-NEXT:    s_bitset0_b32 s23, 21
 ; WAVE32-WWM-PREALLOC-NEXT:    s_add_u32 s20, s20, s9
 ; WAVE32-WWM-PREALLOC-NEXT:    s_addc_u32 s21, s21, 0
-; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $vgpr32 : SGPR spill to VGPR lane
 ; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b32 s14, s8
 ; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b32 s13, s7
 ; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b32 s12, s6
@@ -1168,6 +1138,7 @@ define amdgpu_kernel void @kernel_stacksave_stackrestore_call_with_stack_objects
 ; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b64 s[8:9], s[2:3]
 ; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b32 s0, s32
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $vgpr32 : SGPR spill to VGPR lane
 ; WAVE32-WWM-PREALLOC-NEXT:    v_writelane_b32 v32, s0, 0
 ; WAVE32-WWM-PREALLOC-NEXT:    s_lshr_b32 s0, s0, 5
 ; WAVE32-WWM-PREALLOC-NEXT:    v_writelane_b32 v32, s0, 1
@@ -1259,7 +1230,6 @@ define amdgpu_kernel void @kernel_stacksave_stackrestore_call_with_stack_objects
 ; WAVE32-WWM-PREALLOC-NEXT:    ; use s1
 ; WAVE32-WWM-PREALLOC-NEXT:    ;;#ASMEND
 ; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b32 s32, s0
-; WAVE32-WWM-PREALLOC-NEXT:    ; kill: killed $vgpr32
 ; WAVE32-WWM-PREALLOC-NEXT:    s_endpgm
   %alloca = alloca [32 x i32], addrspace(5)
   %stacksave = call ptr addrspace(5) @llvm.stacksave.p5()
@@ -1344,23 +1314,20 @@ define void @func_stacksave_stackrestore_call_with_stack_objects() {
 ; WAVE32-O0-LABEL: func_stacksave_stackrestore_call_with_stack_objects:
 ; WAVE32-O0:       ; %bb.0:
 ; WAVE32-O0-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; WAVE32-O0-NEXT:    s_mov_b32 s25, s33
+; WAVE32-O0-NEXT:    s_mov_b32 s24, s33
 ; WAVE32-O0-NEXT:    s_mov_b32 s33, s32
 ; WAVE32-O0-NEXT:    s_xor_saveexec_b32 s16, -1
 ; WAVE32-O0-NEXT:    buffer_store_dword v32, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill
-; WAVE32-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:136 ; 4-byte Folded Spill
+; WAVE32-O0-NEXT:    buffer_store_dword v33, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill
 ; WAVE32-O0-NEXT:    s_mov_b32 exec_lo, s16
 ; WAVE32-O0-NEXT:    s_add_i32 s32, s32, 0x1200
-; WAVE32-O0-NEXT:    ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
 ; WAVE32-O0-NEXT:    v_writelane_b32 v32, s30, 0
 ; WAVE32-O0-NEXT:    v_writelane_b32 v32, s31, 1
 ; WAVE32-O0-NEXT:    s_mov_b32 s16, s32
-; WAVE32-O0-NEXT:    v_writelane_b32 v0, s16, 0
+; WAVE32-O0-NEXT:    ; implicit-def: $vgpr33 : SGPR spill to VGPR lane
+; WAVE32-O0-NEXT:    v_writelane_b32 v33, s16, 0
 ; WAVE32-O0-NEXT:    s_lshr_b32 s16, s16, 5
-; WAVE32-O0-NEXT:    v_writelane_b32 v0, s16, 1
-; WAVE32-O0-NEXT:    s_or_saveexec_b32 s24, -1
-; WAVE32-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill
-; WAVE32-O0-NEXT:    s_mov_b32 exec_lo, s24
+; WAVE32-O0-NEXT:    v_writelane_b32 v33, s16, 1
 ; WAVE32-O0-NEXT:    v_mov_b32_e32 v0, 42
 ; WAVE32-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s33
 ; WAVE32-O0-NEXT:    s_waitcnt_vscnt null, 0x0
@@ -1438,25 +1405,20 @@ define void @func_stacksave_stackrestore_call_with_stack_objects() {
 ; WAVE32-O0-NEXT:    ; implicit-def: $sgpr18
 ; WAVE32-O0-NEXT:    v_mov_b32_e32 v30, s18
 ; WAVE32-O0-NEXT:    s_swappc_b64 s[30:31], s[16:17]
-; WAVE32-O0-NEXT:    s_or_saveexec_b32 s24, -1
-; WAVE32-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload
-; WAVE32-O0-NEXT:    s_mov_b32 exec_lo, s24
-; WAVE32-O0-NEXT:    s_waitcnt vmcnt(0)
-; WAVE32-O0-NEXT:    v_readlane_b32 s5, v0, 1
-; WAVE32-O0-NEXT:    v_readlane_b32 s4, v0, 0
+; WAVE32-O0-NEXT:    v_readlane_b32 s5, v33, 1
+; WAVE32-O0-NEXT:    v_readlane_b32 s4, v33, 0
 ; WAVE32-O0-NEXT:    ;;#ASMSTART
 ; WAVE32-O0-NEXT:    ; use s5
 ; WAVE32-O0-NEXT:    ;;#ASMEND
 ; WAVE32-O0-NEXT:    s_mov_b32 s32, s4
 ; WAVE32-O0-NEXT:    v_readlane_b32 s31, v32, 1
 ; WAVE32-O0-NEXT:    v_readlane_b32 s30, v32, 0
-; WAVE32-O0-NEXT:    ; kill: killed $vgpr0
 ; WAVE32-O0-NEXT:    s_xor_saveexec_b32 s4, -1
 ; WAVE32-O0-NEXT:    buffer_load_dword v32, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload
-; WAVE32-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:136 ; 4-byte Folded Reload
+; WAVE32-O0-NEXT:    buffer_load_dword v33, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload
 ; WAVE32-O0-NEXT:    s_mov_b32 exec_lo, s4
 ; WAVE32-O0-NEXT:    s_add_i32 s32, s32, 0xffffee00
-; WAVE32-O0-NEXT:    s_mov_b32 s33, s25
+; WAVE32-O0-NEXT:    s_mov_b32 s33, s24
 ; WAVE32-O0-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE32-O0-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -1467,19 +1429,16 @@ define void @func_stacksave_stackrestore_call_with_stack_objects() {
 ; WAVE64-O0-NEXT:    s_mov_b32 s33, s32
 ; WAVE64-O0-NEXT:    s_xor_saveexec_b64 s[16:17], -1
 ; WAVE64-O0-NEXT:    buffer_store_dword v32, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill
-; WAVE64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:136 ; 4-byte Folded Spill
+; WAVE64-O0-NEXT:    buffer_store_dword v33, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill
 ; WAVE64-O0-NEXT:    s_mov_b64 exec, s[16:17]
 ; WAVE64-O0-NEXT:    s_add_i32 s32, s32, 0x2400
-; WAVE64-O0-NEXT:    ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
 ; WAVE64-O0-NEXT:    v_writelane_b32 v32, s30, 0
 ; WAVE64-O0-NEXT:    v_writelane_b32 v32, s31, 1
 ; WAVE64-O0-NEXT:    s_mov_b32 s16, s32
-; WAVE64-O0-NEXT:    v_writelane_b32 v0, s16, 0
+; WAVE64-O0-NEXT:    ; implicit-def: $vgpr33 : SGPR spill to VGPR lane
+; WAVE64-O0-NEXT:    v_writelane_b32 v33, s16, 0
 ; WAVE64-O0-NEXT:    s_lshr_b32 s16, s16, 6
-; WAVE64-O0-NEXT:    v_writelane_b32 v0, s16, 1
-; WAVE64-O0-NEXT:    s_or_saveexec_b64 s[26:27], -1
-; WAVE64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill
-; WAVE64-O0-NEXT:    s_mov_b64 exec, s[26:27]
+; WAVE64-O0-NEXT:    v_writelane_b32 v33, s16, 1
 ; WAVE64-O0-NEXT:    v_mov_b32_e32 v0, 42
 ; WAVE64-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s33
 ; WAVE64-O0-NEXT:    s_waitcnt_vscnt null, 0x0
@@ -1557,22 +1516,17 @@ define void @func_stacksave_stackrestore_call_with_stack_objects() {
 ; WAVE64-O0-NEXT:    ; implicit-def: $sgpr18
 ; WAVE64-O0-NEXT:    v_mov_b32_e32 v30, s18
 ; WAVE64-O0-NEXT:    s_swappc_b64 s[30:31], s[16:17]
-; WAVE64-O0-NEXT:    s_or_saveexec_b64 s[26:27], -1
-; WAVE64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload
-; WAVE64-O0-NEXT:    s_mov_b64 exec, s[26:27]
-; WAVE64-O0-NEXT:    s_waitcnt vmcnt(0)
-; WAVE64-O0-NEXT:    v_readlane_b32 s5, v0, 1
-; WAVE64-O0-NEXT:    v_readlane_b32 s4, v0, 0
+; WAVE64-O0-NEXT:    v_readlane_b32 s5, v33, 1
+; WAVE64-O0-NEXT:    v_readlane_b32 s4, v33, 0
 ; WAVE64-O0-NEXT:    ;;#ASMSTART
 ; WAVE64-O0-NEXT:    ; use s5
 ; WAVE64-O0-NEXT:    ;;#ASMEND
 ; WAVE64-O0-NEXT:    s_mov_b32 s32, s4
 ; WAVE64-O0-NEXT:    v_readlane_b32 s31, v32, 1
 ; WAVE64-O0-NEXT:    v_readlane_b32 s30, v32, 0
-; WAVE64-O0-NEXT:    ; kill: killed $vgpr0
 ; WAVE64-O0-NEXT:    s_xor_saveexec_b64 s[4:5], -1
 ; WAVE64-O0-NEXT:    buffer_load_dword v32, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload
-; WAVE64-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:136 ; 4-byte Folded Reload
+; WAVE64-O0-NEXT:    buffer_load_dword v33, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload
 ; WAVE64-O0-NEXT:    s_mov_b64 exec, s[4:5]
 ; WAVE64-O0-NEXT:    s_add_i32 s32, s32, 0xffffdc00
 ; WAVE64-O0-NEXT:    s_mov_b32 s33, s19
@@ -1585,14 +1539,14 @@ define void @func_stacksave_stackrestore_call_with_stack_objects() {
 ; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b32 s24, s33
 ; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b32 s33, s32
 ; WAVE32-WWM-PREALLOC-NEXT:    s_xor_saveexec_b32 s16, -1
-; WAVE32-WWM-PREALLOC-NEXT:    buffer_store_dword v33, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill
-; WAVE32-WWM-PREALLOC-NEXT:    buffer_store_dword v32, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill
+; WAVE32-WWM-PREALLOC-NEXT:    buffer_store_dword v32, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill
+; WAVE32-WWM-PREALLOC-NEXT:    buffer_store_dword v33, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill
 ; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b32 exec_lo, s16
 ; WAVE32-WWM-PREALLOC-NEXT:    s_add_i32 s32, s32, 0x1200
-; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $vgpr32 : SGPR spill to VGPR lane
 ; WAVE32-WWM-PREALLOC-NEXT:    v_writelane_b32 v33, s30, 0
 ; WAVE32-WWM-PREALLOC-NEXT:    v_writelane_b32 v33, s31, 1
 ; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b32 s16, s32
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $vgpr32 : SGPR spill to VGPR lane
 ; WAVE32-WWM-PREALLOC-NEXT:    v_writelane_b32 v32, s16, 0
 ; WAVE32-WWM-PREALLOC-NEXT:    s_lshr_b32 s16, s16, 5
 ; WAVE32-WWM-PREALLOC-NEXT:    v_writelane_b32 v32, s16, 1
@@ -1681,10 +1635,9 @@ define void @func_stacksave_stackrestore_call_with_stack_objects() {
 ; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b32 s32, s4
 ; WAVE32-WWM-PREALLOC-NEXT:    v_readlane_b32 s31, v33, 1
 ; WAVE32-WWM-PREALLOC-NEXT:    v_readlane_b32 s30, v33, 0
-; WAVE32-WWM-PREALLOC-NEXT:    ; kill: killed $vgpr32
 ; WAVE32-WWM-PREALLOC-NEXT:    s_xor_saveexec_b32 s4, -1
-; WAVE32-WWM-PREALLOC-NEXT:    buffer_load_dword v33, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload
-; WAVE32-WWM-PREALLOC-NEXT:    buffer_load_dword v32, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload
+; WAVE32-WWM-PREALLOC-NEXT:    buffer_load_dword v32, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload
+; WAVE32-WWM-PREALLOC-NEXT:    buffer_load_dword v33, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload
 ; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b32 exec_lo, s4
 ; WAVE32-WWM-PREALLOC-NEXT:    s_add_i32 s32, s32, 0xffffee00
 ; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b32 s33, s24
diff --git a/llvm/test/CodeGen/AMDGPU/tied-op-for-wwm-scratch-reg-spill-restore.mir b/llvm/test/CodeGen/AMDGPU/tied-op-for-wwm-scratch-reg-spill-restore.mir
index 2c4a5dba3520c..ce0a4bbbe5576 100644
--- a/llvm/test/CodeGen/AMDGPU/tied-op-for-wwm-scratch-reg-spill-restore.mir
+++ b/llvm/test/CodeGen/AMDGPU/tied-op-for-wwm-scratch-reg-spill-restore.mir
@@ -121,17 +121,17 @@ body:             |
   bb.0:
     liveins: $sgpr20, $vgpr1
     ; GCN-LABEL: name: wwm_csr_spill_reload
-    ; GCN: liveins: $sgpr20, $vgpr1, $vgpr40
+    ; GCN: liveins: $sgpr20, $vgpr1, $vgpr2
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+    ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
     ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
-    ; GCN-NEXT: $vgpr40 = IMPLICIT_DEF
-    ; GCN-NEXT: $vgpr40 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr40
-    ; GCN-NEXT: $sgpr20 = SI_RESTORE_S32_FROM_VGPR $vgpr40, 0, implicit $exec
+    ; GCN-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr2
+    ; GCN-NEXT: $sgpr20 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 0, implicit $exec
     ; GCN-NEXT: $vgpr0 = COPY killed $vgpr1, implicit $exec
-    ; GCN-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GCN-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+    ; GCN-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
     ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
     ; GCN-NEXT: SI_RETURN implicit $vgpr0
     $vgpr40 = IMPLICIT_DEF
diff --git a/llvm/test/CodeGen/AMDGPU/trap-abis.ll b/llvm/test/CodeGen/AMDGPU/trap-abis.ll
index 7dce633e9186a..ff019f7741353 100644
--- a/llvm/test/CodeGen/AMDGPU/trap-abis.ll
+++ b/llvm/test/CodeGen/AMDGPU/trap-abis.ll
@@ -206,14 +206,14 @@ define amdgpu_kernel void @non_entry_trap(ptr addrspace(1) nocapture readonly %a
 ;
 ; HSA-TRAP-GFX1100-O0-LABEL: non_entry_trap:
 ; HSA-TRAP-GFX1100-O0:       ; %bb.0: ; %entry
-; HSA-TRAP-GFX1100-O0-NEXT:    ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
 ; HSA-TRAP-GFX1100-O0-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
 ; HSA-TRAP-GFX1100-O0-NEXT:    s_waitcnt lgkmcnt(0)
 ; HSA-TRAP-GFX1100-O0-NEXT:    s_mov_b64 s[2:3], s[0:1]
-; HSA-TRAP-GFX1100-O0-NEXT:    v_writelane_b32 v0, s2, 0
-; HSA-TRAP-GFX1100-O0-NEXT:    v_writelane_b32 v0, s3, 1
+; HSA-TRAP-GFX1100-O0-NEXT:    ; implicit-def: $vgpr2 : SGPR spill to VGPR lane
+; HSA-TRAP-GFX1100-O0-NEXT:    v_writelane_b32 v2, s2, 0
+; HSA-TRAP-GFX1100-O0-NEXT:    v_writelane_b32 v2, s3, 1
 ; HSA-TRAP-GFX1100-O0-NEXT:    s_or_saveexec_b32 s6, -1
-; HSA-TRAP-GFX1100-O0-NEXT:    scratch_store_b32 off, v0, off ; 4-byte Folded Spill
+; HSA-TRAP-GFX1100-O0-NEXT:    scratch_store_b32 off, v2, off ; 4-byte Folded Spill
 ; HSA-TRAP-GFX1100-O0-NEXT:    s_mov_b32 exec_lo, s6
 ; HSA-TRAP-GFX1100-O0-NEXT:    v_mov_b32_e32 v0, 0
 ; HSA-TRAP-GFX1100-O0-NEXT:    global_load_b32 v0, v0, s[0:1] glc dlc
@@ -236,16 +236,15 @@ define amdgpu_kernel void @non_entry_trap(ptr addrspace(1) nocapture readonly %a
 ; HSA-TRAP-GFX1100-O0-NEXT:    s_branch .LBB1_3
 ; HSA-TRAP-GFX1100-O0-NEXT:  .LBB1_2: ; %ret
 ; HSA-TRAP-GFX1100-O0-NEXT:    s_or_saveexec_b32 s6, -1
-; HSA-TRAP-GFX1100-O0-NEXT:    scratch_load_b32 v0, off, off ; 4-byte Folded Reload
+; HSA-TRAP-GFX1100-O0-NEXT:    scratch_load_b32 v2, off, off ; 4-byte Folded Reload
 ; HSA-TRAP-GFX1100-O0-NEXT:    s_mov_b32 exec_lo, s6
 ; HSA-TRAP-GFX1100-O0-NEXT:    s_waitcnt vmcnt(0)
-; HSA-TRAP-GFX1100-O0-NEXT:    v_readlane_b32 s0, v0, 0
-; HSA-TRAP-GFX1100-O0-NEXT:    v_readlane_b32 s1, v0, 1
-; HSA-TRAP-GFX1100-O0-NEXT:    v_mov_b32_e32 v1, 0
-; HSA-TRAP-GFX1100-O0-NEXT:    v_mov_b32_e32 v2, 3
-; HSA-TRAP-GFX1100-O0-NEXT:    global_store_b32 v1, v2, s[0:1] dlc
+; HSA-TRAP-GFX1100-O0-NEXT:    v_readlane_b32 s0, v2, 0
+; HSA-TRAP-GFX1100-O0-NEXT:    v_readlane_b32 s1, v2, 1
+; HSA-TRAP-GFX1100-O0-NEXT:    v_mov_b32_e32 v0, 0
+; HSA-TRAP-GFX1100-O0-NEXT:    v_mov_b32_e32 v1, 3
+; HSA-TRAP-GFX1100-O0-NEXT:    global_store_b32 v0, v1, s[0:1] dlc
 ; HSA-TRAP-GFX1100-O0-NEXT:    s_waitcnt_vscnt null, 0x0
-; HSA-TRAP-GFX1100-O0-NEXT:    ; kill: killed $vgpr0
 ; HSA-TRAP-GFX1100-O0-NEXT:    s_endpgm
 ; HSA-TRAP-GFX1100-O0-NEXT:  .LBB1_3: ; =>This Inner Loop Header: Depth=1
 ; HSA-TRAP-GFX1100-O0-NEXT:    s_sethalt 5
@@ -352,34 +351,33 @@ define amdgpu_kernel void @trap_with_use_after(ptr addrspace(1) %arg0, ptr addrs
 ;
 ; HSA-TRAP-GFX1100-O0-LABEL: trap_with_use_after:
 ; HSA-TRAP-GFX1100-O0:       ; %bb.0:
-; HSA-TRAP-GFX1100-O0-NEXT:    ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
 ; HSA-TRAP-GFX1100-O0-NEXT:    v_mov_b32_e32 v0, 0
 ; HSA-TRAP-GFX1100-O0-NEXT:    scratch_store_b32 off, v0, off offset:8 ; 4-byte Folded Spill
 ; HSA-TRAP-GFX1100-O0-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
 ; HSA-TRAP-GFX1100-O0-NEXT:    s_load_b64 s[2:3], s[4:5], 0x8
+; HSA-TRAP-GFX1100-O0-NEXT:    ; implicit-def: $vgpr2 : SGPR spill to VGPR lane
 ; HSA-TRAP-GFX1100-O0-NEXT:    s_waitcnt lgkmcnt(0)
-; HSA-TRAP-GFX1100-O0-NEXT:    v_writelane_b32 v1, s2, 0
-; HSA-TRAP-GFX1100-O0-NEXT:    v_writelane_b32 v1, s3, 1
+; HSA-TRAP-GFX1100-O0-NEXT:    v_writelane_b32 v2, s2, 0
+; HSA-TRAP-GFX1100-O0-NEXT:    v_writelane_b32 v2, s3, 1
 ; HSA-TRAP-GFX1100-O0-NEXT:    s_or_saveexec_b32 s6, -1
-; HSA-TRAP-GFX1100-O0-NEXT:    scratch_store_b32 off, v1, off offset:4 ; 4-byte Folded Spill
+; HSA-TRAP-GFX1100-O0-NEXT:    scratch_store_b32 off, v2, off ; 4-byte Folded Spill
 ; HSA-TRAP-GFX1100-O0-NEXT:    s_mov_b32 exec_lo, s6
 ; HSA-TRAP-GFX1100-O0-NEXT:    global_load_b32 v0, v0, s[0:1] glc dlc
 ; HSA-TRAP-GFX1100-O0-NEXT:    s_waitcnt vmcnt(0)
-; HSA-TRAP-GFX1100-O0-NEXT:    scratch_store_b32 off, v0, off ; 4-byte Folded Spill
+; HSA-TRAP-GFX1100-O0-NEXT:    scratch_store_b32 off, v0, off offset:4 ; 4-byte Folded Spill
 ; HSA-TRAP-GFX1100-O0-NEXT:    s_cbranch_execnz .LBB2_2
 ; HSA-TRAP-GFX1100-O0-NEXT:  ; %bb.1:
 ; HSA-TRAP-GFX1100-O0-NEXT:    s_or_saveexec_b32 s6, -1
-; HSA-TRAP-GFX1100-O0-NEXT:    scratch_load_b32 v0, off, off offset:4 ; 4-byte Folded Reload
+; HSA-TRAP-GFX1100-O0-NEXT:    scratch_load_b32 v2, off, off ; 4-byte Folded Reload
 ; HSA-TRAP-GFX1100-O0-NEXT:    s_mov_b32 exec_lo, s6
 ; HSA-TRAP-GFX1100-O0-NEXT:    s_waitcnt vmcnt(0)
-; HSA-TRAP-GFX1100-O0-NEXT:    v_readlane_b32 s0, v0, 0
-; HSA-TRAP-GFX1100-O0-NEXT:    v_readlane_b32 s1, v0, 1
-; HSA-TRAP-GFX1100-O0-NEXT:    scratch_load_b32 v1, off, off offset:8 ; 4-byte Folded Reload
-; HSA-TRAP-GFX1100-O0-NEXT:    scratch_load_b32 v2, off, off ; 4-byte Folded Reload
+; HSA-TRAP-GFX1100-O0-NEXT:    v_readlane_b32 s0, v2, 0
+; HSA-TRAP-GFX1100-O0-NEXT:    v_readlane_b32 s1, v2, 1
+; HSA-TRAP-GFX1100-O0-NEXT:    scratch_load_b32 v0, off, off offset:8 ; 4-byte Folded Reload
+; HSA-TRAP-GFX1100-O0-NEXT:    scratch_load_b32 v1, off, off offset:4 ; 4-byte Folded Reload
 ; HSA-TRAP-GFX1100-O0-NEXT:    s_waitcnt vmcnt(0)
-; HSA-TRAP-GFX1100-O0-NEXT:    global_store_b32 v1, v2, s[0:1] dlc
+; HSA-TRAP-GFX1100-O0-NEXT:    global_store_b32 v0, v1, s[0:1] dlc
 ; HSA-TRAP-GFX1100-O0-NEXT:    s_waitcnt_vscnt null, 0x0
-; HSA-TRAP-GFX1100-O0-NEXT:    ; kill: killed $vgpr0
 ; HSA-TRAP-GFX1100-O0-NEXT:    s_endpgm
 ; HSA-TRAP-GFX1100-O0-NEXT:  .LBB2_2:
 ; HSA-TRAP-GFX1100-O0-NEXT:    s_trap 2
diff --git a/llvm/test/CodeGen/AMDGPU/true16-ra-pre-gfx11-regression-test.mir b/llvm/test/CodeGen/AMDGPU/true16-ra-pre-gfx11-regression-test.mir
index c73b023f18652..4c2d0d2fa0d77 100644
--- a/llvm/test/CodeGen/AMDGPU/true16-ra-pre-gfx11-regression-test.mir
+++ b/llvm/test/CodeGen/AMDGPU/true16-ra-pre-gfx11-regression-test.mir
@@ -1,4 +1,4 @@
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -start-before=greedy,0 -stop-after=virtregrewriter,1 -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GCN %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -start-before=greedy,0 -stop-after=virtregrewriter,2 -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GCN %s
 
 --- |
   define amdgpu_ps void @e32() #0 {
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-spill-placement-issue61083.ll b/llvm/test/CodeGen/AMDGPU/vgpr-spill-placement-issue61083.ll
index 20dc5ad5c8665..e1fbd351997d1 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-spill-placement-issue61083.ll
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-spill-placement-issue61083.ll
@@ -13,26 +13,23 @@ define amdgpu_kernel void @__omp_offloading_16_dd2df_main_l9()  {
 ; CHECK:       ; %bb.0: ; %bb
 ; CHECK-NEXT:    s_add_u32 s0, s0, s13
 ; CHECK-NEXT:    s_addc_u32 s1, s1, 0
-; CHECK-NEXT:    ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
-; CHECK-NEXT:    v_mov_b32_e32 v2, v0
-; CHECK-NEXT:    s_or_saveexec_b64 s[8:9], -1
-; CHECK-NEXT:    buffer_load_dword v0, off, s[0:3], 0 ; 4-byte Folded Reload
-; CHECK-NEXT:    s_mov_b64 exec, s[8:9]
-; CHECK-NEXT:    v_mov_b32_e32 v1, 0
-; CHECK-NEXT:    global_load_ushort v3, v1, s[4:5] offset:4
+; CHECK-NEXT:    v_mov_b32_e32 v1, v0
+; CHECK-NEXT:    v_mov_b32_e32 v0, 0
+; CHECK-NEXT:    global_load_ushort v2, v0, s[4:5] offset:4
 ; CHECK-NEXT:    s_waitcnt vmcnt(0)
-; CHECK-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:4 ; 4-byte Folded Spill
+; CHECK-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:4 ; 4-byte Folded Spill
 ; CHECK-NEXT:    ; implicit-def: $sgpr4
 ; CHECK-NEXT:    s_mov_b32 s4, 0
-; CHECK-NEXT:    v_cmp_eq_u32_e64 s[6:7], v2, s4
+; CHECK-NEXT:    v_cmp_eq_u32_e64 s[6:7], v1, s4
 ; CHECK-NEXT:    s_mov_b32 s4, 0
-; CHECK-NEXT:    v_mov_b32_e32 v2, s4
-; CHECK-NEXT:    ds_write_b8 v1, v2
+; CHECK-NEXT:    v_mov_b32_e32 v1, s4
+; CHECK-NEXT:    ds_write_b8 v0, v1
 ; CHECK-NEXT:    s_mov_b64 s[4:5], exec
-; CHECK-NEXT:    v_writelane_b32 v0, s4, 0
-; CHECK-NEXT:    v_writelane_b32 v0, s5, 1
+; CHECK-NEXT:    ; implicit-def: $vgpr3 : SGPR spill to VGPR lane
+; CHECK-NEXT:    v_writelane_b32 v3, s4, 0
+; CHECK-NEXT:    v_writelane_b32 v3, s5, 1
 ; CHECK-NEXT:    s_or_saveexec_b64 s[8:9], -1
-; CHECK-NEXT:    buffer_store_dword v0, off, s[0:3], 0 ; 4-byte Folded Spill
+; CHECK-NEXT:    buffer_store_dword v3, off, s[0:3], 0 ; 4-byte Folded Spill
 ; CHECK-NEXT:    s_mov_b64 exec, s[8:9]
 ; CHECK-NEXT:    s_and_b64 s[4:5], s[4:5], s[6:7]
 ; CHECK-NEXT:    s_mov_b64 exec, s[4:5]
@@ -40,11 +37,11 @@ define amdgpu_kernel void @__omp_offloading_16_dd2df_main_l9()  {
 ; CHECK-NEXT:  ; %bb.1: ; %bb193
 ; CHECK-NEXT:  .LBB0_2: ; %bb194
 ; CHECK-NEXT:    s_or_saveexec_b64 s[8:9], -1
-; CHECK-NEXT:    buffer_load_dword v1, off, s[0:3], 0 ; 4-byte Folded Reload
+; CHECK-NEXT:    buffer_load_dword v3, off, s[0:3], 0 ; 4-byte Folded Reload
 ; CHECK-NEXT:    s_mov_b64 exec, s[8:9]
 ; CHECK-NEXT:    s_waitcnt vmcnt(0)
-; CHECK-NEXT:    v_readlane_b32 s4, v1, 0
-; CHECK-NEXT:    v_readlane_b32 s5, v1, 1
+; CHECK-NEXT:    v_readlane_b32 s4, v3, 0
+; CHECK-NEXT:    v_readlane_b32 s5, v3, 1
 ; CHECK-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; CHECK-NEXT:    buffer_load_dword v0, off, s[0:3], 0 offset:4 ; 4-byte Folded Reload
 ; CHECK-NEXT:    s_mov_b32 s4, 0
@@ -65,10 +62,6 @@ define amdgpu_kernel void @__omp_offloading_16_dd2df_main_l9()  {
 ; CHECK-NEXT:    s_trap 2
 ; CHECK-NEXT:    ; divergent unreachable
 ; CHECK-NEXT:  .LBB0_4: ; %UnifiedReturnBlock
-; CHECK-NEXT:    s_or_saveexec_b64 s[8:9], -1
-; CHECK-NEXT:    buffer_load_dword v0, off, s[0:3], 0 ; 4-byte Folded Reload
-; CHECK-NEXT:    s_mov_b64 exec, s[8:9]
-; CHECK-NEXT:    ; kill: killed $vgpr0
 ; CHECK-NEXT:    s_endpgm
 bb:
   %i10 = tail call i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/virtregrewrite-undef-identity-copy.mir b/llvm/test/CodeGen/AMDGPU/virtregrewrite-undef-identity-copy.mir
index 6659e95323769..fa0922590712a 100644
--- a/llvm/test/CodeGen/AMDGPU/virtregrewrite-undef-identity-copy.mir
+++ b/llvm/test/CodeGen/AMDGPU/virtregrewrite-undef-identity-copy.mir
@@ -30,7 +30,7 @@ machineFunctionInfo:
 body:             |
   bb.0:
     ; CHECK-LABEL: name: undef_identity_copy
-    ; CHECK: renamable $vgpr40_vgpr41_vgpr42_vgpr43 = FLAT_LOAD_DWORDX4 undef renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128), addrspace 1)
+    ; CHECK: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 undef %1:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128), addrspace 1)
     ; CHECK-NEXT: renamable $sgpr6_sgpr7 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @foo + 4, target-flags(amdgpu-rel32-hi) @foo + 4, implicit-def dead $scc
     ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr95, implicit-def $scc
     ; CHECK-NEXT: $sgpr4 = COPY $sgpr95
@@ -39,13 +39,14 @@ body:             |
     ; CHECK-NEXT: renamable $sgpr6_sgpr7 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @bar + 4, target-flags(amdgpu-rel32-hi) @bar + 4, implicit-def dead $scc
     ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr95
     ; CHECK-NEXT: $sgpr4 = COPY $sgpr95
-    ; CHECK-NEXT: $vgpr0 = COPY renamable $vgpr40
-    ; CHECK-NEXT: $vgpr1 = COPY renamable $vgpr41
-    ; CHECK-NEXT: $vgpr2 = COPY killed renamable $vgpr42
-    ; CHECK-NEXT: $vgpr3 = KILL undef renamable $vgpr3
+    ; CHECK-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORDX4_]].sub0
+    ; CHECK-NEXT: $vgpr1 = COPY [[FLAT_LOAD_DWORDX4_]].sub1
+    ; CHECK-NEXT: $vgpr2 = COPY [[FLAT_LOAD_DWORDX4_]].sub2
+    ; CHECK-NEXT: $vgpr3 = COPY undef %4:vgpr_32
     ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr6_sgpr7, @bar, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4, implicit $vgpr0, implicit killed $vgpr1, implicit killed $vgpr2, implicit killed $vgpr3, implicit-def $vgpr0
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr95
-    ; CHECK-NEXT: FLAT_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1)
+    ; CHECK-NEXT: FLAT_STORE_DWORD undef %6:vreg_64, [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1)
     ; CHECK-NEXT: S_ENDPGM 0
     %0:vreg_128 = FLAT_LOAD_DWORDX4 undef %1:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128), addrspace 1)
     %2:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @foo + 4, target-flags(amdgpu-rel32-hi) @foo + 4, implicit-def dead $scc
diff --git a/llvm/test/CodeGen/AMDGPU/whole-wave-register-copy.ll b/llvm/test/CodeGen/AMDGPU/whole-wave-register-copy.ll
index 8c285f37b4878..d1ee82e74b3de 100644
--- a/llvm/test/CodeGen/AMDGPU/whole-wave-register-copy.ll
+++ b/llvm/test/CodeGen/AMDGPU/whole-wave-register-copy.ll
@@ -13,24 +13,24 @@ define void @vector_reg_liverange_split() #0 {
 ; GFX90A-NEXT:    s_mov_b32 s16, s33
 ; GFX90A-NEXT:    s_mov_b32 s33, s32
 ; GFX90A-NEXT:    s_xor_saveexec_b64 s[18:19], -1
-; GFX90A-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
+; GFX90A-NEXT:    buffer_store_dword v39, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
 ; GFX90A-NEXT:    s_mov_b64 exec, -1
-; GFX90A-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GFX90A-NEXT:    buffer_store_dword a32, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
+; GFX90A-NEXT:    buffer_store_dword a32, off, s[0:3], s33 ; 4-byte Folded Spill
+; GFX90A-NEXT:    buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
 ; GFX90A-NEXT:    s_mov_b64 exec, s[18:19]
 ; GFX90A-NEXT:    v_writelane_b32 v40, s16, 4
 ; GFX90A-NEXT:    v_writelane_b32 v40, s28, 2
 ; GFX90A-NEXT:    v_writelane_b32 v40, s29, 3
-; GFX90A-NEXT:    ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
 ; GFX90A-NEXT:    v_writelane_b32 v40, s30, 0
+; GFX90A-NEXT:    ; implicit-def: $vgpr39 : SGPR spill to VGPR lane
 ; GFX90A-NEXT:    s_addk_i32 s32, 0x400
 ; GFX90A-NEXT:    v_writelane_b32 v40, s31, 1
 ; GFX90A-NEXT:    ;;#ASMSTART
 ; GFX90A-NEXT:    ; def s20
 ; GFX90A-NEXT:    ;;#ASMEND
-; GFX90A-NEXT:    v_writelane_b32 v0, s20, 0
+; GFX90A-NEXT:    v_writelane_b32 v39, s20, 0
 ; GFX90A-NEXT:    s_or_saveexec_b64 s[28:29], -1
-; GFX90A-NEXT:    v_accvgpr_write_b32 a32, v0
+; GFX90A-NEXT:    v_accvgpr_write_b32 a32, v39
 ; GFX90A-NEXT:    s_mov_b64 exec, s[28:29]
 ; GFX90A-NEXT:    s_getpc_b64 s[16:17]
 ; GFX90A-NEXT:    s_add_u32 s16, s16, foo at gotpcrel32@lo+4
@@ -39,23 +39,22 @@ define void @vector_reg_liverange_split() #0 {
 ; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX90A-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX90A-NEXT:    s_or_saveexec_b64 s[28:29], -1
-; GFX90A-NEXT:    v_accvgpr_read_b32 v0, a32
+; GFX90A-NEXT:    v_accvgpr_read_b32 v39, a32
 ; GFX90A-NEXT:    s_mov_b64 exec, s[28:29]
-; GFX90A-NEXT:    v_readlane_b32 s20, v0, 0
+; GFX90A-NEXT:    v_readlane_b32 s20, v39, 0
 ; GFX90A-NEXT:    ;;#ASMSTART
 ; GFX90A-NEXT:    ; use s20
 ; GFX90A-NEXT:    ;;#ASMEND
 ; GFX90A-NEXT:    v_readlane_b32 s31, v40, 1
 ; GFX90A-NEXT:    v_readlane_b32 s30, v40, 0
-; GFX90A-NEXT:    ; kill: killed $vgpr0
 ; GFX90A-NEXT:    v_readlane_b32 s4, v40, 4
 ; GFX90A-NEXT:    v_readlane_b32 s28, v40, 2
 ; GFX90A-NEXT:    v_readlane_b32 s29, v40, 3
 ; GFX90A-NEXT:    s_xor_saveexec_b64 s[6:7], -1
-; GFX90A-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
+; GFX90A-NEXT:    buffer_load_dword v39, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
 ; GFX90A-NEXT:    s_mov_b64 exec, -1
-; GFX90A-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GFX90A-NEXT:    buffer_load_dword a32, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
+; GFX90A-NEXT:    buffer_load_dword a32, off, s[0:3], s33 ; 4-byte Folded Reload
+; GFX90A-NEXT:    buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
 ; GFX90A-NEXT:    s_mov_b64 exec, s[6:7]
 ; GFX90A-NEXT:    s_addk_i32 s32, 0xfc00
 ; GFX90A-NEXT:    s_mov_b32 s33, s4
diff --git a/llvm/test/CodeGen/AMDGPU/whole-wave-register-spill.ll b/llvm/test/CodeGen/AMDGPU/whole-wave-register-spill.ll
index 5608ea8563548..4837efe6606b8 100644
--- a/llvm/test/CodeGen/AMDGPU/whole-wave-register-spill.ll
+++ b/llvm/test/CodeGen/AMDGPU/whole-wave-register-spill.ll
@@ -19,24 +19,23 @@ define void @test() #0 {
 ; GCN-NEXT:    s_mov_b32 s16, s33
 ; GCN-NEXT:    s_mov_b32 s33, s32
 ; GCN-NEXT:    s_xor_saveexec_b64 s[18:19], -1
-; GCN-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
-; GCN-NEXT:    buffer_store_dword v1, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v39, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
 ; GCN-NEXT:    s_mov_b64 exec, -1
-; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
 ; GCN-NEXT:    s_mov_b64 exec, s[18:19]
 ; GCN-NEXT:    v_writelane_b32 v40, s16, 4
 ; GCN-NEXT:    v_writelane_b32 v40, s28, 2
 ; GCN-NEXT:    v_writelane_b32 v40, s29, 3
-; GCN-NEXT:    ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
 ; GCN-NEXT:    v_writelane_b32 v40, s30, 0
-; GCN-NEXT:    s_addk_i32 s32, 0x800
+; GCN-NEXT:    ; implicit-def: $vgpr39 : SGPR spill to VGPR lane
+; GCN-NEXT:    s_addk_i32 s32, 0x400
 ; GCN-NEXT:    v_writelane_b32 v40, s31, 1
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; def s16
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_writelane_b32 v0, s16, 0
+; GCN-NEXT:    v_writelane_b32 v39, s16, 0
 ; GCN-NEXT:    s_or_saveexec_b64 s[28:29], -1
-; GCN-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v39, off, s[0:3], s33 ; 4-byte Folded Spill
 ; GCN-NEXT:    s_mov_b64 exec, s[28:29]
 ; GCN-NEXT:    s_getpc_b64 s[16:17]
 ; GCN-NEXT:    s_add_u32 s16, s16, ext_func at gotpcrel32@lo+4
@@ -45,26 +44,24 @@ define void @test() #0 {
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GCN-NEXT:    s_or_saveexec_b64 s[28:29], -1
-; GCN-NEXT:    buffer_load_dword v1, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v39, off, s[0:3], s33 ; 4-byte Folded Reload
 ; GCN-NEXT:    s_mov_b64 exec, s[28:29]
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    v_readlane_b32 s4, v1, 0
+; GCN-NEXT:    v_readlane_b32 s4, v39, 0
 ; GCN-NEXT:    v_mov_b32_e32 v0, s4
 ; GCN-NEXT:    global_store_dword v[0:1], v0, off
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    v_readlane_b32 s31, v40, 1
 ; GCN-NEXT:    v_readlane_b32 s30, v40, 0
-; GCN-NEXT:    ; kill: killed $vgpr1
 ; GCN-NEXT:    v_readlane_b32 s4, v40, 4
 ; GCN-NEXT:    v_readlane_b32 s28, v40, 2
 ; GCN-NEXT:    v_readlane_b32 s29, v40, 3
 ; GCN-NEXT:    s_xor_saveexec_b64 s[6:7], -1
-; GCN-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
-; GCN-NEXT:    buffer_load_dword v1, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v39, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
 ; GCN-NEXT:    s_mov_b64 exec, -1
-; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
 ; GCN-NEXT:    s_mov_b64 exec, s[6:7]
-; GCN-NEXT:    s_addk_i32 s32, 0xf800
+; GCN-NEXT:    s_addk_i32 s32, 0xfc00
 ; GCN-NEXT:    s_mov_b32 s33, s4
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
@@ -75,23 +72,23 @@ define void @test() #0 {
 ; GCN-O0-NEXT:    s_mov_b32 s16, s33
 ; GCN-O0-NEXT:    s_mov_b32 s33, s32
 ; GCN-O0-NEXT:    s_xor_saveexec_b64 s[18:19], -1
-; GCN-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
+; GCN-O0-NEXT:    buffer_store_dword v39, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
 ; GCN-O0-NEXT:    s_mov_b64 exec, -1
-; GCN-O0-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GCN-O0-NEXT:    buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[18:19]
 ; GCN-O0-NEXT:    v_writelane_b32 v40, s16, 4
 ; GCN-O0-NEXT:    v_writelane_b32 v40, s28, 2
 ; GCN-O0-NEXT:    v_writelane_b32 v40, s29, 3
 ; GCN-O0-NEXT:    s_add_i32 s32, s32, 0x400
-; GCN-O0-NEXT:    ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
 ; GCN-O0-NEXT:    v_writelane_b32 v40, s30, 0
 ; GCN-O0-NEXT:    v_writelane_b32 v40, s31, 1
 ; GCN-O0-NEXT:    ;;#ASMSTART
 ; GCN-O0-NEXT:    ; def s16
 ; GCN-O0-NEXT:    ;;#ASMEND
-; GCN-O0-NEXT:    v_writelane_b32 v0, s16, 0
+; GCN-O0-NEXT:    ; implicit-def: $vgpr39 : SGPR spill to VGPR lane
+; GCN-O0-NEXT:    v_writelane_b32 v39, s16, 0
 ; GCN-O0-NEXT:    s_or_saveexec_b64 s[28:29], -1
-; GCN-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
+; GCN-O0-NEXT:    buffer_store_dword v39, off, s[0:3], s33 ; 4-byte Folded Spill
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[28:29]
 ; GCN-O0-NEXT:    s_getpc_b64 s[16:17]
 ; GCN-O0-NEXT:    s_add_u32 s16, s16, ext_func at gotpcrel32@lo+4
@@ -104,26 +101,25 @@ define void @test() #0 {
 ; GCN-O0-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN-O0-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GCN-O0-NEXT:    s_or_saveexec_b64 s[28:29], -1
-; GCN-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
+; GCN-O0-NEXT:    buffer_load_dword v39, off, s[0:3], s33 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[28:29]
 ; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
-; GCN-O0-NEXT:    v_readlane_b32 s4, v0, 0
+; GCN-O0-NEXT:    v_readlane_b32 s4, v39, 0
 ; GCN-O0-NEXT:    ; implicit-def: $sgpr6_sgpr7
-; GCN-O0-NEXT:    v_mov_b32_e32 v1, s6
-; GCN-O0-NEXT:    v_mov_b32_e32 v2, s7
-; GCN-O0-NEXT:    v_mov_b32_e32 v3, s4
-; GCN-O0-NEXT:    global_store_dword v[1:2], v3, off
+; GCN-O0-NEXT:    v_mov_b32_e32 v0, s6
+; GCN-O0-NEXT:    v_mov_b32_e32 v1, s7
+; GCN-O0-NEXT:    v_mov_b32_e32 v2, s4
+; GCN-O0-NEXT:    global_store_dword v[0:1], v2, off
 ; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-O0-NEXT:    v_readlane_b32 s31, v40, 1
 ; GCN-O0-NEXT:    v_readlane_b32 s30, v40, 0
-; GCN-O0-NEXT:    ; kill: killed $vgpr0
 ; GCN-O0-NEXT:    v_readlane_b32 s4, v40, 4
 ; GCN-O0-NEXT:    v_readlane_b32 s28, v40, 2
 ; GCN-O0-NEXT:    v_readlane_b32 s29, v40, 3
 ; GCN-O0-NEXT:    s_xor_saveexec_b64 s[6:7], -1
-; GCN-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
+; GCN-O0-NEXT:    buffer_load_dword v39, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    s_mov_b64 exec, -1
-; GCN-O0-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GCN-O0-NEXT:    buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
 ; GCN-O0-NEXT:    s_mov_b64 exec, s[6:7]
 ; GCN-O0-NEXT:    s_add_i32 s32, s32, 0xfffffc00
 ; GCN-O0-NEXT:    s_mov_b32 s33, s4
diff --git a/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll b/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll
index e79cb66dcd776..6279a3627fe72 100644
--- a/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll
+++ b/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll
@@ -141,15 +141,10 @@ define amdgpu_gfx void @strict_wwm_cfg(ptr addrspace(8) inreg %tmp14, i32 %arg)
 ; GFX9-O0:       ; %bb.0: ; %entry
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-O0-NEXT:    s_xor_saveexec_b64 s[34:35], -1
-; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[34:35]
-; GFX9-O0-NEXT:    ; implicit-def: $vgpr3 : SGPR spill to VGPR lane
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v0
-; GFX9-O0-NEXT:    s_or_saveexec_b64 s[46:47], -1
-; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    s_mov_b64 exec, s[46:47]
 ; GFX9-O0-NEXT:    s_mov_b32 s40, s6
 ; GFX9-O0-NEXT:    s_mov_b32 s34, s4
 ; GFX9-O0-NEXT:    ; kill: def $sgpr40 killed $sgpr40 def $sgpr40_sgpr41
@@ -164,20 +159,20 @@ define amdgpu_gfx void @strict_wwm_cfg(ptr addrspace(8) inreg %tmp14, i32 %arg)
 ; GFX9-O0-NEXT:    s_mov_b32 s37, s44
 ; GFX9-O0-NEXT:    s_mov_b32 s38, s43
 ; GFX9-O0-NEXT:    s_mov_b32 s39, s42
-; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s40, 0
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s41, 1
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s34, 2
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s35, 3
+; GFX9-O0-NEXT:    ; implicit-def: $vgpr5 : SGPR spill to VGPR lane
+; GFX9-O0-NEXT:    v_writelane_b32 v5, s40, 0
+; GFX9-O0-NEXT:    v_writelane_b32 v5, s41, 1
+; GFX9-O0-NEXT:    v_writelane_b32 v5, s34, 2
+; GFX9-O0-NEXT:    v_writelane_b32 v5, s35, 3
 ; GFX9-O0-NEXT:    s_mov_b32 s34, 0
 ; GFX9-O0-NEXT:    s_nop 2
-; GFX9-O0-NEXT:    buffer_load_dwordx2 v[4:5], off, s[36:39], s34
+; GFX9-O0-NEXT:    buffer_load_dwordx2 v[3:4], off, s[36:39], s34
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr36_sgpr37
-; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v4
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v3
 ; GFX9-O0-NEXT:    s_not_b64 exec, exec
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v1, s34
 ; GFX9-O0-NEXT:    s_not_b64 exec, exec
@@ -187,16 +182,16 @@ define amdgpu_gfx void @strict_wwm_cfg(ptr addrspace(8) inreg %tmp14, i32 %arg)
 ; GFX9-O0-NEXT:    v_mov_b32_dpp v2, v1 row_bcast:31 row_mask:0xc bank_mask:0xf
 ; GFX9-O0-NEXT:    v_add_u32_e64 v1, v1, v2
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[36:37]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v1
-; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    v_cmp_eq_u32_e64 s[36:37], v3, s34
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, s34
-; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v1
+; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_cmp_eq_u32_e64 s[36:37], v0, s34
+; GFX9-O0-NEXT:    v_mov_b32_e32 v0, s34
+; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_mov_b64 s[34:35], exec
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s34, 4
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s35, 5
+; GFX9-O0-NEXT:    v_writelane_b32 v5, s34, 4
+; GFX9-O0-NEXT:    v_writelane_b32 v5, s35, 5
 ; GFX9-O0-NEXT:    s_or_saveexec_b64 s[46:47], -1
-; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[46:47]
 ; GFX9-O0-NEXT:    s_and_b64 s[34:35], s[34:35], s[36:37]
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[34:35]
@@ -221,25 +216,25 @@ define amdgpu_gfx void @strict_wwm_cfg(ptr addrspace(8) inreg %tmp14, i32 %arg)
 ; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:  .LBB1_2: ; %merge
 ; GFX9-O0-NEXT:    s_or_saveexec_b64 s[46:47], -1
-; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[46:47]
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    v_readlane_b32 s36, v0, 4
-; GFX9-O0-NEXT:    v_readlane_b32 s37, v0, 5
+; GFX9-O0-NEXT:    v_readlane_b32 s36, v5, 4
+; GFX9-O0-NEXT:    v_readlane_b32 s37, v5, 5
 ; GFX9-O0-NEXT:    s_or_b64 exec, exec, s[36:37]
-; GFX9-O0-NEXT:    v_readlane_b32 s38, v0, 0
-; GFX9-O0-NEXT:    v_readlane_b32 s39, v0, 1
-; GFX9-O0-NEXT:    v_readlane_b32 s34, v0, 2
-; GFX9-O0-NEXT:    v_readlane_b32 s35, v0, 3
-; GFX9-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    v_readlane_b32 s38, v5, 0
+; GFX9-O0-NEXT:    v_readlane_b32 s39, v5, 1
+; GFX9-O0-NEXT:    v_readlane_b32 s34, v5, 2
+; GFX9-O0-NEXT:    v_readlane_b32 s35, v5, 3
+; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    v_cmp_eq_u32_e64 s[36:37], v3, v4
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v3, 0, 1, s[36:37]
+; GFX9-O0-NEXT:    v_cmp_eq_u32_e64 s[36:37], v0, v3
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[36:37]
 ; GFX9-O0-NEXT:    s_mov_b32 s36, 1
-; GFX9-O0-NEXT:    v_lshlrev_b32_e64 v3, s36, v3
+; GFX9-O0-NEXT:    v_lshlrev_b32_e64 v0, s36, v0
 ; GFX9-O0-NEXT:    s_mov_b32 s36, 2
-; GFX9-O0-NEXT:    v_and_b32_e64 v3, v3, s36
+; GFX9-O0-NEXT:    v_and_b32_e64 v0, v0, s36
 ; GFX9-O0-NEXT:    s_mov_b32 s40, s35
 ; GFX9-O0-NEXT:    s_mov_b32 s36, s34
 ; GFX9-O0-NEXT:    s_mov_b32 s34, s39
@@ -249,12 +244,11 @@ define amdgpu_gfx void @strict_wwm_cfg(ptr addrspace(8) inreg %tmp14, i32 %arg)
 ; GFX9-O0-NEXT:    s_mov_b32 s38, s35
 ; GFX9-O0-NEXT:    s_mov_b32 s39, s34
 ; GFX9-O0-NEXT:    s_mov_b32 s34, 0
-; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[36:39], s34 offset:4
-; GFX9-O0-NEXT:    ; kill: killed $vgpr0
+; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[36:39], s34 offset:4
 ; GFX9-O0-NEXT:    s_xor_saveexec_b64 s[34:35], -1
-; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[34:35]
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-O0-NEXT:    s_setpc_b64 s[30:31]
@@ -371,9 +365,9 @@ define amdgpu_gfx void @strict_wwm_call(ptr addrspace(8) inreg %tmp14, i32 inreg
 ; GFX9-O0-NEXT:    s_mov_b32 s48, s33
 ; GFX9-O0-NEXT:    s_mov_b32 s33, s32
 ; GFX9-O0-NEXT:    s_xor_saveexec_b64 s[34:35], -1
-; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s33 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s33 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[34:35]
 ; GFX9-O0-NEXT:    s_add_i32 s32, s32, 0x400
 ; GFX9-O0-NEXT:    v_writelane_b32 v3, s30, 0
@@ -415,9 +409,9 @@ define amdgpu_gfx void @strict_wwm_call(ptr addrspace(8) inreg %tmp14, i32 inreg
 ; GFX9-O0-NEXT:    v_readlane_b32 s31, v3, 1
 ; GFX9-O0-NEXT:    v_readlane_b32 s30, v3, 0
 ; GFX9-O0-NEXT:    s_xor_saveexec_b64 s[34:35], -1
-; GFX9-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s33 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s33 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[34:35]
 ; GFX9-O0-NEXT:    s_add_i32 s32, s32, 0xfffffc00
 ; GFX9-O0-NEXT:    s_mov_b32 s33, s48
@@ -430,9 +424,9 @@ define amdgpu_gfx void @strict_wwm_call(ptr addrspace(8) inreg %tmp14, i32 inreg
 ; GFX9-O3-NEXT:    s_mov_b32 s38, s33
 ; GFX9-O3-NEXT:    s_mov_b32 s33, s32
 ; GFX9-O3-NEXT:    s_xor_saveexec_b64 s[34:35], -1
-; GFX9-O3-NEXT:    buffer_store_dword v3, off, s[0:3], s33 ; 4-byte Folded Spill
-; GFX9-O3-NEXT:    buffer_store_dword v2, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
-; GFX9-O3-NEXT:    buffer_store_dword v1, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
+; GFX9-O3-NEXT:    buffer_store_dword v2, off, s[0:3], s33 ; 4-byte Folded Spill
+; GFX9-O3-NEXT:    buffer_store_dword v1, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
+; GFX9-O3-NEXT:    buffer_store_dword v3, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
 ; GFX9-O3-NEXT:    s_mov_b64 exec, s[34:35]
 ; GFX9-O3-NEXT:    v_writelane_b32 v3, s30, 0
 ; GFX9-O3-NEXT:    s_addk_i32 s32, 0x400
@@ -455,9 +449,9 @@ define amdgpu_gfx void @strict_wwm_call(ptr addrspace(8) inreg %tmp14, i32 inreg
 ; GFX9-O3-NEXT:    v_readlane_b32 s31, v3, 1
 ; GFX9-O3-NEXT:    v_readlane_b32 s30, v3, 0
 ; GFX9-O3-NEXT:    s_xor_saveexec_b64 s[34:35], -1
-; GFX9-O3-NEXT:    buffer_load_dword v3, off, s[0:3], s33 ; 4-byte Folded Reload
-; GFX9-O3-NEXT:    buffer_load_dword v2, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
-; GFX9-O3-NEXT:    buffer_load_dword v1, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
+; GFX9-O3-NEXT:    buffer_load_dword v2, off, s[0:3], s33 ; 4-byte Folded Reload
+; GFX9-O3-NEXT:    buffer_load_dword v1, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
+; GFX9-O3-NEXT:    buffer_load_dword v3, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
 ; GFX9-O3-NEXT:    s_mov_b64 exec, s[34:35]
 ; GFX9-O3-NEXT:    s_addk_i32 s32, 0xfc00
 ; GFX9-O3-NEXT:    s_mov_b32 s33, s38
@@ -559,28 +553,26 @@ define amdgpu_gfx void @strict_wwm_call_i64(ptr addrspace(8) inreg %tmp14, i64 i
 ; GFX9-O0-LABEL: strict_wwm_call_i64:
 ; GFX9-O0:       ; %bb.0:
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-O0-NEXT:    s_mov_b32 s48, s33
+; GFX9-O0-NEXT:    s_mov_b32 s46, s33
 ; GFX9-O0-NEXT:    s_mov_b32 s33, s32
 ; GFX9-O0-NEXT:    s_xor_saveexec_b64 s[34:35], -1
-; GFX9-O0-NEXT:    buffer_store_dword v10, off, s[0:3], s33 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v8, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v8, off, s[0:3], s33 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v9, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v9, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v10, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v11, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[34:35]
-; GFX9-O0-NEXT:    s_add_i32 s32, s32, 0x1000
-; GFX9-O0-NEXT:    ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
+; GFX9-O0-NEXT:    s_add_i32 s32, s32, 0xc00
 ; GFX9-O0-NEXT:    v_writelane_b32 v10, s30, 0
 ; GFX9-O0-NEXT:    v_writelane_b32 v10, s31, 1
 ; GFX9-O0-NEXT:    s_mov_b32 s34, s8
@@ -598,10 +590,11 @@ define amdgpu_gfx void @strict_wwm_call_i64(ptr addrspace(8) inreg %tmp14, i64 i
 ; GFX9-O0-NEXT:    s_mov_b32 s41, s45
 ; GFX9-O0-NEXT:    s_mov_b32 s42, s44
 ; GFX9-O0-NEXT:    s_mov_b32 s43, s35
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s40, 0
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s41, 1
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s42, 2
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s43, 3
+; GFX9-O0-NEXT:    ; implicit-def: $vgpr11 : SGPR spill to VGPR lane
+; GFX9-O0-NEXT:    v_writelane_b32 v11, s40, 0
+; GFX9-O0-NEXT:    v_writelane_b32 v11, s41, 1
+; GFX9-O0-NEXT:    v_writelane_b32 v11, s42, 2
+; GFX9-O0-NEXT:    v_writelane_b32 v11, s43, 3
 ; GFX9-O0-NEXT:    ; kill: def $sgpr34 killed $sgpr34 def $sgpr34_sgpr35
 ; GFX9-O0-NEXT:    s_mov_b32 s35, s9
 ; GFX9-O0-NEXT:    ; kill: def $sgpr36_sgpr37 killed $sgpr34_sgpr35
@@ -613,11 +606,8 @@ define amdgpu_gfx void @strict_wwm_call_i64(ptr addrspace(8) inreg %tmp14, i64 i
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v9, s37
 ; GFX9-O0-NEXT:    s_not_b64 exec, exec
 ; GFX9-O0-NEXT:    s_or_saveexec_b64 s[34:35], -1
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s34, 4
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s35, 5
-; GFX9-O0-NEXT:    s_or_saveexec_b64 s[46:47], -1
-; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    s_mov_b64 exec, s[46:47]
+; GFX9-O0-NEXT:    v_writelane_b32 v11, s34, 4
+; GFX9-O0-NEXT:    v_writelane_b32 v11, s35, 5
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v8
 ; GFX9-O0-NEXT:    s_mov_b32 s34, 32
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr36_sgpr37
@@ -634,20 +624,13 @@ define amdgpu_gfx void @strict_wwm_call_i64(ptr addrspace(8) inreg %tmp14, i64 i
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v3
 ; GFX9-O0-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-O0-NEXT:    s_swappc_b64 s[30:31], s[34:35]
-; GFX9-O0-NEXT:    s_or_saveexec_b64 s[46:47], -1
-; GFX9-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    s_mov_b64 exec, s[46:47]
-; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    v_readlane_b32 s34, v6, 4
-; GFX9-O0-NEXT:    v_readlane_b32 s35, v6, 5
-; GFX9-O0-NEXT:    v_readlane_b32 s36, v6, 0
-; GFX9-O0-NEXT:    v_readlane_b32 s37, v6, 1
-; GFX9-O0-NEXT:    v_readlane_b32 s38, v6, 2
-; GFX9-O0-NEXT:    v_readlane_b32 s39, v6, 3
+; GFX9-O0-NEXT:    v_readlane_b32 s34, v11, 4
+; GFX9-O0-NEXT:    v_readlane_b32 s35, v11, 5
+; GFX9-O0-NEXT:    v_readlane_b32 s36, v11, 0
+; GFX9-O0-NEXT:    v_readlane_b32 s37, v11, 1
+; GFX9-O0-NEXT:    v_readlane_b32 s38, v11, 2
+; GFX9-O0-NEXT:    v_readlane_b32 s39, v11, 3
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v0
-; GFX9-O0-NEXT:    s_or_saveexec_b64 s[46:47], -1
-; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    s_mov_b64 exec, s[46:47]
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v1
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr40
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr40
@@ -656,30 +639,28 @@ define amdgpu_gfx void @strict_wwm_call_i64(ptr addrspace(8) inreg %tmp14, i64 i
 ; GFX9-O0-NEXT:    v_add_co_u32_e64 v2, s[40:41], v2, v4
 ; GFX9-O0-NEXT:    v_addc_co_u32_e64 v3, s[40:41], v3, v5, s[40:41]
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[34:35]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v2
-; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v3
+; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v2
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v3
 ; GFX9-O0-NEXT:    s_mov_b32 s34, 0
-; GFX9-O0-NEXT:    buffer_store_dwordx2 v[6:7], off, s[36:39], s34 offset:4
+; GFX9-O0-NEXT:    buffer_store_dwordx2 v[0:1], off, s[36:39], s34 offset:4
 ; GFX9-O0-NEXT:    v_readlane_b32 s31, v10, 1
 ; GFX9-O0-NEXT:    v_readlane_b32 s30, v10, 0
-; GFX9-O0-NEXT:    ; kill: killed $vgpr0
 ; GFX9-O0-NEXT:    s_xor_saveexec_b64 s[34:35], -1
-; GFX9-O0-NEXT:    buffer_load_dword v10, off, s[0:3], s33 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v9, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s33 offset:52 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s33 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v9, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v10, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v11, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[34:35]
-; GFX9-O0-NEXT:    s_add_i32 s32, s32, 0xfffff000
-; GFX9-O0-NEXT:    s_mov_b32 s33, s48
+; GFX9-O0-NEXT:    s_add_i32 s32, s32, 0xfffff400
+; GFX9-O0-NEXT:    s_mov_b32 s33, s46
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-O0-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -689,15 +670,15 @@ define amdgpu_gfx void @strict_wwm_call_i64(ptr addrspace(8) inreg %tmp14, i64 i
 ; GFX9-O3-NEXT:    s_mov_b32 s40, s33
 ; GFX9-O3-NEXT:    s_mov_b32 s33, s32
 ; GFX9-O3-NEXT:    s_xor_saveexec_b64 s[34:35], -1
-; GFX9-O3-NEXT:    buffer_store_dword v8, off, s[0:3], s33 ; 4-byte Folded Spill
-; GFX9-O3-NEXT:    buffer_store_dword v6, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
+; GFX9-O3-NEXT:    buffer_store_dword v6, off, s[0:3], s33 ; 4-byte Folded Spill
 ; GFX9-O3-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O3-NEXT:    buffer_store_dword v7, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
+; GFX9-O3-NEXT:    buffer_store_dword v7, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
+; GFX9-O3-NEXT:    buffer_store_dword v2, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
+; GFX9-O3-NEXT:    buffer_store_dword v3, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
 ; GFX9-O3-NEXT:    buffer_store_dword v2, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
-; GFX9-O3-NEXT:    buffer_store_dword v3, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill
-; GFX9-O3-NEXT:    buffer_store_dword v2, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill
 ; GFX9-O3-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O3-NEXT:    buffer_store_dword v3, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill
+; GFX9-O3-NEXT:    buffer_store_dword v3, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill
+; GFX9-O3-NEXT:    buffer_store_dword v8, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill
 ; GFX9-O3-NEXT:    s_mov_b64 exec, s[34:35]
 ; GFX9-O3-NEXT:    v_writelane_b32 v8, s30, 0
 ; GFX9-O3-NEXT:    s_addk_i32 s32, 0x800
@@ -730,13 +711,13 @@ define amdgpu_gfx void @strict_wwm_call_i64(ptr addrspace(8) inreg %tmp14, i64 i
 ; GFX9-O3-NEXT:    v_readlane_b32 s31, v8, 1
 ; GFX9-O3-NEXT:    v_readlane_b32 s30, v8, 0
 ; GFX9-O3-NEXT:    s_xor_saveexec_b64 s[34:35], -1
-; GFX9-O3-NEXT:    buffer_load_dword v8, off, s[0:3], s33 ; 4-byte Folded Reload
-; GFX9-O3-NEXT:    buffer_load_dword v6, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
-; GFX9-O3-NEXT:    buffer_load_dword v7, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
+; GFX9-O3-NEXT:    buffer_load_dword v6, off, s[0:3], s33 ; 4-byte Folded Reload
+; GFX9-O3-NEXT:    buffer_load_dword v7, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
+; GFX9-O3-NEXT:    buffer_load_dword v2, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
+; GFX9-O3-NEXT:    buffer_load_dword v3, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
 ; GFX9-O3-NEXT:    buffer_load_dword v2, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
 ; GFX9-O3-NEXT:    buffer_load_dword v3, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload
-; GFX9-O3-NEXT:    buffer_load_dword v2, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload
-; GFX9-O3-NEXT:    buffer_load_dword v3, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload
+; GFX9-O3-NEXT:    buffer_load_dword v8, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload
 ; GFX9-O3-NEXT:    s_mov_b64 exec, s[34:35]
 ; GFX9-O3-NEXT:    s_addk_i32 s32, 0xf800
 ; GFX9-O3-NEXT:    s_mov_b32 s33, s40
@@ -841,12 +822,12 @@ define amdgpu_gfx void @strict_wwm_amdgpu_cs_main(<4 x i32> inreg %desc, i32 %in
 ; GFX9-O3-NEXT:    buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill
 ; GFX9-O3-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-O3-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
-; GFX9-O3-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
+; GFX9-O3-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
 ; GFX9-O3-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O3-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
-; GFX9-O3-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
+; GFX9-O3-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
+; GFX9-O3-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
 ; GFX9-O3-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O3-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
+; GFX9-O3-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
 ; GFX9-O3-NEXT:    s_mov_b64 exec, s[34:35]
 ; GFX9-O3-NEXT:    v_lshlrev_b32_e32 v0, 5, v0
 ; GFX9-O3-NEXT:    buffer_load_dwordx4 v[7:10], v0, s[4:7], 0 offen
@@ -884,10 +865,10 @@ define amdgpu_gfx void @strict_wwm_amdgpu_cs_main(<4 x i32> inreg %desc, i32 %in
 ; GFX9-O3-NEXT:    s_xor_saveexec_b64 s[34:35], -1
 ; GFX9-O3-NEXT:    buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload
 ; GFX9-O3-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
-; GFX9-O3-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
-; GFX9-O3-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
-; GFX9-O3-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
-; GFX9-O3-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
+; GFX9-O3-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
+; GFX9-O3-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
+; GFX9-O3-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
+; GFX9-O3-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
 ; GFX9-O3-NEXT:    s_mov_b64 exec, s[34:35]
 ; GFX9-O3-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-O3-NEXT:    s_setpc_b64 s[30:31]
@@ -925,20 +906,20 @@ define amdgpu_gfx <32 x i32> @strict_wwm_callee_saves(<32 x i32> inreg %keep, pt
 ; GFX9-O0-NEXT:    buffer_store_dword v32, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-O0-NEXT:    buffer_store_dword v33, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v34, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v34, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v35, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v36, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v35, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v36, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v37, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v38, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v37, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v38, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v39, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v40, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v39, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v40, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v41, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v41, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_mov_b64 exec, -1
-; GFX9-O0-NEXT:    buffer_store_dword v47, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v47, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[34:35]
 ; GFX9-O0-NEXT:    buffer_store_dword v40, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    buffer_store_dword v41, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
@@ -951,35 +932,35 @@ define amdgpu_gfx <32 x i32> @strict_wwm_callee_saves(<32 x i32> inreg %keep, pt
 ; GFX9-O0-NEXT:    v_writelane_b32 v47, s65, 1
 ; GFX9-O0-NEXT:    v_writelane_b32 v47, s66, 2
 ; GFX9-O0-NEXT:    v_writelane_b32 v47, s67, 3
-; GFX9-O0-NEXT:    buffer_store_dword v10, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v9, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v10, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v9, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:20
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:16
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:12
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:8
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:4
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr34
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr34
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr34
@@ -989,145 +970,145 @@ define amdgpu_gfx <32 x i32> @strict_wwm_callee_saves(<32 x i32> inreg %keep, pt
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v42, s5
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v1, s6
-; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    v_mov_b32_e32 v1, s7
 ; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    v_mov_b32_e32 v1, s8
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, s7
 ; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    v_mov_b32_e32 v1, s9
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, s8
 ; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    v_mov_b32_e32 v1, s10
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, s9
 ; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    v_mov_b32_e32 v1, s11
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, s10
 ; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    v_mov_b32_e32 v1, s12
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, s11
 ; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    v_mov_b32_e32 v1, s13
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, s12
 ; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    v_mov_b32_e32 v1, s14
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, s13
 ; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    v_mov_b32_e32 v1, s15
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, s14
 ; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    v_mov_b32_e32 v1, s16
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, s15
 ; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    v_mov_b32_e32 v1, s17
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, s16
 ; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    v_mov_b32_e32 v1, s18
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, s17
 ; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    v_mov_b32_e32 v1, s19
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, s18
 ; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    v_mov_b32_e32 v1, s20
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, s19
 ; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    v_mov_b32_e32 v1, s21
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, s20
 ; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    v_mov_b32_e32 v1, s22
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, s21
 ; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    v_mov_b32_e32 v1, s23
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, s22
 ; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    v_mov_b32_e32 v1, s24
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, s23
 ; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, s24
+; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v46, s25
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v45, s26
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v44, s27
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v43, s28
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v1, s29
-; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 killed $exec
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v42
-; GFX9-O0-NEXT:    buffer_load_dword v42, off, s[0:3], s32 offset:200 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v42, off, s[0:3], s32 offset:196 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v42
-; GFX9-O0-NEXT:    buffer_load_dword v42, off, s[0:3], s32 offset:196 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v42, off, s[0:3], s32 offset:192 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v42
-; GFX9-O0-NEXT:    buffer_load_dword v42, off, s[0:3], s32 offset:192 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v42, off, s[0:3], s32 offset:188 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v42
-; GFX9-O0-NEXT:    buffer_load_dword v42, off, s[0:3], s32 offset:188 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v42, off, s[0:3], s32 offset:184 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v42
-; GFX9-O0-NEXT:    buffer_load_dword v42, off, s[0:3], s32 offset:184 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v42, off, s[0:3], s32 offset:180 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v42
-; GFX9-O0-NEXT:    buffer_load_dword v42, off, s[0:3], s32 offset:180 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v42, off, s[0:3], s32 offset:176 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v42
-; GFX9-O0-NEXT:    buffer_load_dword v42, off, s[0:3], s32 offset:176 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v42, off, s[0:3], s32 offset:172 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v42
-; GFX9-O0-NEXT:    buffer_load_dword v42, off, s[0:3], s32 offset:172 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v42, off, s[0:3], s32 offset:168 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v42
-; GFX9-O0-NEXT:    buffer_load_dword v42, off, s[0:3], s32 offset:168 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v42, off, s[0:3], s32 offset:164 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v42
-; GFX9-O0-NEXT:    buffer_load_dword v42, off, s[0:3], s32 offset:164 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v42, off, s[0:3], s32 offset:160 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v42
-; GFX9-O0-NEXT:    buffer_load_dword v42, off, s[0:3], s32 offset:160 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v42, off, s[0:3], s32 offset:156 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v42
-; GFX9-O0-NEXT:    buffer_load_dword v42, off, s[0:3], s32 offset:156 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v42, off, s[0:3], s32 offset:152 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v42
-; GFX9-O0-NEXT:    buffer_load_dword v42, off, s[0:3], s32 offset:152 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v42, off, s[0:3], s32 offset:148 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v14, v42
-; GFX9-O0-NEXT:    buffer_load_dword v42, off, s[0:3], s32 offset:148 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v42, off, s[0:3], s32 offset:144 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v42
-; GFX9-O0-NEXT:    buffer_load_dword v42, off, s[0:3], s32 offset:144 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v42, off, s[0:3], s32 offset:140 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v42
-; GFX9-O0-NEXT:    buffer_load_dword v42, off, s[0:3], s32 offset:140 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v42, off, s[0:3], s32 offset:136 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v42
-; GFX9-O0-NEXT:    buffer_load_dword v42, off, s[0:3], s32 offset:136 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v42, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v18, v42
-; GFX9-O0-NEXT:    buffer_load_dword v42, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v42, off, s[0:3], s32 offset:128 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v19, v42
-; GFX9-O0-NEXT:    buffer_load_dword v42, off, s[0:3], s32 offset:128 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v42, off, s[0:3], s32 offset:124 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v20, v42
-; GFX9-O0-NEXT:    buffer_load_dword v42, off, s[0:3], s32 offset:124 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v42, off, s[0:3], s32 offset:120 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v21, v46
-; GFX9-O0-NEXT:    buffer_load_dword v46, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v46, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v22, v45
-; GFX9-O0-NEXT:    buffer_load_dword v45, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v45, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v23, v44
-; GFX9-O0-NEXT:    buffer_load_dword v44, off, s[0:3], s32 offset:112 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v44, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v24, v43
-; GFX9-O0-NEXT:    buffer_load_dword v43, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v43, off, s[0:3], s32 offset:112 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(4)
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v25, v42
-; GFX9-O0-NEXT:    buffer_load_dword v42, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v42, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v26, v42
-; GFX9-O0-NEXT:    buffer_load_dword v42, off, s[0:3], s32 offset:120 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v42, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v27, v46
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v28, v45
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v29, v44
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v30, v43
 ; GFX9-O0-NEXT:    ; kill: def $vgpr31 killed $vgpr42 killed $exec
-; GFX9-O0-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:120 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v30, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v29, off, s[0:3], s32 offset:112 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v28, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v27, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v26, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v10, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v9, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v30, off, s[0:3], s32 offset:112 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v29, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v28, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v27, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v26, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v10, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v9, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr34
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr34
 ; GFX9-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
@@ -1247,16 +1228,16 @@ define amdgpu_gfx <32 x i32> @strict_wwm_callee_saves(<32 x i32> inreg %keep, pt
 ; GFX9-O0-NEXT:    s_xor_saveexec_b64 s[34:35], -1
 ; GFX9-O0-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:204 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    buffer_load_dword v33, off, s[0:3], s32 offset:208 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v34, off, s[0:3], s32 offset:212 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v35, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v36, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v37, off, s[0:3], s32 offset:224 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v38, off, s[0:3], s32 offset:228 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v39, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v40, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v41, off, s[0:3], s32 offset:240 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v34, off, s[0:3], s32 offset:208 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v35, off, s[0:3], s32 offset:212 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v36, off, s[0:3], s32 offset:212 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v37, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v38, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v39, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v40, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v41, off, s[0:3], s32 offset:224 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_mov_b64 exec, -1
-; GFX9-O0-NEXT:    buffer_load_dword v47, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v47, off, s[0:3], s32 offset:200 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[34:35]
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-O0-NEXT:    s_setpc_b64 s[30:31]
@@ -1268,18 +1249,18 @@ define amdgpu_gfx <32 x i32> @strict_wwm_callee_saves(<32 x i32> inreg %keep, pt
 ; GFX9-O3-NEXT:    buffer_store_dword v32, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
 ; GFX9-O3-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-O3-NEXT:    buffer_store_dword v33, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
-; GFX9-O3-NEXT:    buffer_store_dword v34, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
+; GFX9-O3-NEXT:    buffer_store_dword v34, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
 ; GFX9-O3-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O3-NEXT:    buffer_store_dword v35, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
-; GFX9-O3-NEXT:    buffer_store_dword v36, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
+; GFX9-O3-NEXT:    buffer_store_dword v35, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
+; GFX9-O3-NEXT:    buffer_store_dword v36, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
 ; GFX9-O3-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O3-NEXT:    buffer_store_dword v37, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
-; GFX9-O3-NEXT:    buffer_store_dword v38, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
+; GFX9-O3-NEXT:    buffer_store_dword v37, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
+; GFX9-O3-NEXT:    buffer_store_dword v38, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
 ; GFX9-O3-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O3-NEXT:    buffer_store_dword v39, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
-; GFX9-O3-NEXT:    buffer_store_dword v40, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
+; GFX9-O3-NEXT:    buffer_store_dword v39, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
+; GFX9-O3-NEXT:    buffer_store_dword v40, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
 ; GFX9-O3-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O3-NEXT:    buffer_store_dword v41, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
+; GFX9-O3-NEXT:    buffer_store_dword v41, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
 ; GFX9-O3-NEXT:    s_mov_b64 exec, s[34:35]
 ; GFX9-O3-NEXT:    buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
 ; GFX9-O3-NEXT:    buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
@@ -1361,14 +1342,14 @@ define amdgpu_gfx <32 x i32> @strict_wwm_callee_saves(<32 x i32> inreg %keep, pt
 ; GFX9-O3-NEXT:    s_xor_saveexec_b64 s[34:35], -1
 ; GFX9-O3-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
 ; GFX9-O3-NEXT:    buffer_load_dword v33, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
-; GFX9-O3-NEXT:    buffer_load_dword v34, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
-; GFX9-O3-NEXT:    buffer_load_dword v35, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
-; GFX9-O3-NEXT:    buffer_load_dword v36, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
-; GFX9-O3-NEXT:    buffer_load_dword v37, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
-; GFX9-O3-NEXT:    buffer_load_dword v38, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
-; GFX9-O3-NEXT:    buffer_load_dword v39, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
-; GFX9-O3-NEXT:    buffer_load_dword v40, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
-; GFX9-O3-NEXT:    buffer_load_dword v41, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
+; GFX9-O3-NEXT:    buffer_load_dword v34, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
+; GFX9-O3-NEXT:    buffer_load_dword v35, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
+; GFX9-O3-NEXT:    buffer_load_dword v36, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
+; GFX9-O3-NEXT:    buffer_load_dword v37, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
+; GFX9-O3-NEXT:    buffer_load_dword v38, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
+; GFX9-O3-NEXT:    buffer_load_dword v39, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
+; GFX9-O3-NEXT:    buffer_load_dword v40, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
+; GFX9-O3-NEXT:    buffer_load_dword v41, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
 ; GFX9-O3-NEXT:    s_mov_b64 exec, s[34:35]
 ; GFX9-O3-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-O3-NEXT:    s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll b/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll
index def51f2b16d3e..1124d34c5c6c4 100644
--- a/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll
+++ b/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll
@@ -121,15 +121,10 @@ define amdgpu_cs void @cfg(ptr addrspace(8) inreg %tmp14, i32 %arg) {
 ; GFX9-O0-NEXT:    s_mov_b32 s19, 0xe00000
 ; GFX9-O0-NEXT:    s_add_u32 s16, s16, s4
 ; GFX9-O0-NEXT:    s_addc_u32 s17, s17, 0
-; GFX9-O0-NEXT:    ; implicit-def: $vgpr3 : SGPR spill to VGPR lane
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v0
-; GFX9-O0-NEXT:    s_or_saveexec_b64 s[12:13], -1
-; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[16:19], 0 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    s_mov_b64 exec, s[12:13]
-; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s3, 0
+; GFX9-O0-NEXT:    ; implicit-def: $vgpr5 : SGPR spill to VGPR lane
+; GFX9-O0-NEXT:    v_writelane_b32 v5, s3, 0
 ; GFX9-O0-NEXT:    s_mov_b32 s4, s1
-; GFX9-O0-NEXT:    v_readlane_b32 s1, v0, 0
+; GFX9-O0-NEXT:    v_readlane_b32 s1, v5, 0
 ; GFX9-O0-NEXT:    ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3
 ; GFX9-O0-NEXT:    s_mov_b32 s3, s1
 ; GFX9-O0-NEXT:    s_mov_b32 s8, s3
@@ -142,19 +137,19 @@ define amdgpu_cs void @cfg(ptr addrspace(8) inreg %tmp14, i32 %arg) {
 ; GFX9-O0-NEXT:    s_mov_b32 s5, s10
 ; GFX9-O0-NEXT:    s_mov_b32 s6, s9
 ; GFX9-O0-NEXT:    s_mov_b32 s7, s8
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s2, 1
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s3, 2
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s0, 3
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s1, 4
+; GFX9-O0-NEXT:    v_writelane_b32 v5, s2, 1
+; GFX9-O0-NEXT:    v_writelane_b32 v5, s3, 2
+; GFX9-O0-NEXT:    v_writelane_b32 v5, s0, 3
+; GFX9-O0-NEXT:    v_writelane_b32 v5, s1, 4
 ; GFX9-O0-NEXT:    s_mov_b32 s0, 0
 ; GFX9-O0-NEXT:    s_nop 2
-; GFX9-O0-NEXT:    buffer_load_dwordx2 v[4:5], off, s[4:7], s0
+; GFX9-O0-NEXT:    buffer_load_dwordx2 v[3:4], off, s[4:7], s0
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[16:19], 0 offset:12 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[16:19], 0 offset:12 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[16:19], 0 offset:16 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[16:19], 0 offset:16 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr2_sgpr3
-; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v4
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v3
 ; GFX9-O0-NEXT:    s_not_b64 exec, exec
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v1, s0
 ; GFX9-O0-NEXT:    s_not_b64 exec, exec
@@ -164,16 +159,16 @@ define amdgpu_cs void @cfg(ptr addrspace(8) inreg %tmp14, i32 %arg) {
 ; GFX9-O0-NEXT:    v_mov_b32_dpp v2, v1 row_bcast:31 row_mask:0xc bank_mask:0xf
 ; GFX9-O0-NEXT:    v_add_u32_e64 v1, v1, v2
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[2:3]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v1
-; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[16:19], 0 offset:8 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    v_cmp_eq_u32_e64 s[2:3], v3, s0
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, s0
-; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[16:19], 0 offset:4 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v1
+; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[16:19], 0 offset:8 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_cmp_eq_u32_e64 s[2:3], v0, s0
+; GFX9-O0-NEXT:    v_mov_b32_e32 v0, s0
+; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[16:19], 0 offset:4 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_mov_b64 s[0:1], exec
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s0, 5
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s1, 6
+; GFX9-O0-NEXT:    v_writelane_b32 v5, s0, 5
+; GFX9-O0-NEXT:    v_writelane_b32 v5, s1, 6
 ; GFX9-O0-NEXT:    s_or_saveexec_b64 s[12:13], -1
-; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[16:19], 0 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[16:19], 0 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[12:13]
 ; GFX9-O0-NEXT:    s_and_b64 s[0:1], s[0:1], s[2:3]
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[0:1]
@@ -198,25 +193,25 @@ define amdgpu_cs void @cfg(ptr addrspace(8) inreg %tmp14, i32 %arg) {
 ; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[16:19], 0 offset:4 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:  .LBB1_2: ; %merge
 ; GFX9-O0-NEXT:    s_or_saveexec_b64 s[12:13], -1
-; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[16:19], 0 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v5, off, s[16:19], 0 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[12:13]
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    v_readlane_b32 s4, v0, 5
-; GFX9-O0-NEXT:    v_readlane_b32 s5, v0, 6
+; GFX9-O0-NEXT:    v_readlane_b32 s4, v5, 5
+; GFX9-O0-NEXT:    v_readlane_b32 s5, v5, 6
 ; GFX9-O0-NEXT:    s_or_b64 exec, exec, s[4:5]
-; GFX9-O0-NEXT:    v_readlane_b32 s2, v0, 1
-; GFX9-O0-NEXT:    v_readlane_b32 s3, v0, 2
-; GFX9-O0-NEXT:    v_readlane_b32 s0, v0, 3
-; GFX9-O0-NEXT:    v_readlane_b32 s1, v0, 4
-; GFX9-O0-NEXT:    buffer_load_dword v3, off, s[16:19], 0 offset:8 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v4, off, s[16:19], 0 offset:4 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    v_readlane_b32 s2, v5, 1
+; GFX9-O0-NEXT:    v_readlane_b32 s3, v5, 2
+; GFX9-O0-NEXT:    v_readlane_b32 s0, v5, 3
+; GFX9-O0-NEXT:    v_readlane_b32 s1, v5, 4
+; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[16:19], 0 offset:8 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v3, off, s[16:19], 0 offset:4 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    v_cmp_eq_u32_e64 s[4:5], v3, v4
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v3, 0, 1, s[4:5]
+; GFX9-O0-NEXT:    v_cmp_eq_u32_e64 s[4:5], v0, v3
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
 ; GFX9-O0-NEXT:    s_mov_b32 s4, 1
-; GFX9-O0-NEXT:    v_lshlrev_b32_e64 v3, s4, v3
+; GFX9-O0-NEXT:    v_lshlrev_b32_e64 v0, s4, v0
 ; GFX9-O0-NEXT:    s_mov_b32 s4, 2
-; GFX9-O0-NEXT:    v_and_b32_e64 v3, v3, s4
+; GFX9-O0-NEXT:    v_and_b32_e64 v0, v0, s4
 ; GFX9-O0-NEXT:    s_mov_b32 s6, s1
 ; GFX9-O0-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
 ; GFX9-O0-NEXT:    s_mov_b32 s4, s3
@@ -226,8 +221,7 @@ define amdgpu_cs void @cfg(ptr addrspace(8) inreg %tmp14, i32 %arg) {
 ; GFX9-O0-NEXT:    s_mov_b32 s2, s5
 ; GFX9-O0-NEXT:    s_mov_b32 s3, s4
 ; GFX9-O0-NEXT:    s_mov_b32 s4, 0
-; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s4 offset:4
-; GFX9-O0-NEXT:    ; kill: killed $vgpr0
+; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s4 offset:4
 ; GFX9-O0-NEXT:    s_endpgm
 ;
 ; GFX9-O3-LABEL: cfg:
@@ -328,38 +322,32 @@ define hidden i32 @called(i32 %a) noinline {
 define amdgpu_kernel void @call(ptr addrspace(8) inreg %tmp14, i32 inreg %arg) {
 ; GFX9-O0-LABEL: call:
 ; GFX9-O0:       ; %bb.0:
-; GFX9-O0-NEXT:    s_mov_b32 s32, 0x400
+; GFX9-O0-NEXT:    s_mov_b32 s32, 0
 ; GFX9-O0-NEXT:    s_mov_b32 s24, SCRATCH_RSRC_DWORD0
 ; GFX9-O0-NEXT:    s_mov_b32 s25, SCRATCH_RSRC_DWORD1
 ; GFX9-O0-NEXT:    s_mov_b32 s26, -1
 ; GFX9-O0-NEXT:    s_mov_b32 s27, 0xe00000
 ; GFX9-O0-NEXT:    s_add_u32 s24, s24, s9
 ; GFX9-O0-NEXT:    s_addc_u32 s25, s25, 0
-; GFX9-O0-NEXT:    ; implicit-def: $vgpr7 : SGPR spill to VGPR lane
 ; GFX9-O0-NEXT:    s_or_saveexec_b64 s[10:11], -1
-; GFX9-O0-NEXT:    v_writelane_b32 v7, s10, 0
-; GFX9-O0-NEXT:    v_writelane_b32 v7, s11, 1
+; GFX9-O0-NEXT:    ; implicit-def: $vgpr3 : SGPR spill to VGPR lane
+; GFX9-O0-NEXT:    v_writelane_b32 v3, s10, 0
+; GFX9-O0-NEXT:    v_writelane_b32 v3, s11, 1
 ; GFX9-O0-NEXT:    s_mov_b32 s14, s8
 ; GFX9-O0-NEXT:    s_mov_b32 s13, s7
 ; GFX9-O0-NEXT:    s_mov_b32 s12, s6
 ; GFX9-O0-NEXT:    s_mov_b64 s[10:11], s[4:5]
 ; GFX9-O0-NEXT:    s_mov_b64 s[4:5], s[2:3]
-; GFX9-O0-NEXT:    v_readlane_b32 s2, v7, 0
-; GFX9-O0-NEXT:    v_readlane_b32 s3, v7, 1
-; GFX9-O0-NEXT:    v_writelane_b32 v7, s4, 2
-; GFX9-O0-NEXT:    v_writelane_b32 v7, s5, 3
-; GFX9-O0-NEXT:    s_or_saveexec_b64 s[20:21], -1
-; GFX9-O0-NEXT:    buffer_store_dword v7, off, s[24:27], 0 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    s_mov_b64 exec, s[20:21]
+; GFX9-O0-NEXT:    v_readlane_b32 s2, v3, 0
+; GFX9-O0-NEXT:    v_readlane_b32 s3, v3, 1
+; GFX9-O0-NEXT:    v_writelane_b32 v3, s4, 2
+; GFX9-O0-NEXT:    v_writelane_b32 v3, s5, 3
 ; GFX9-O0-NEXT:    s_mov_b64 s[4:5], s[0:1]
-; GFX9-O0-NEXT:    v_readlane_b32 s0, v7, 2
-; GFX9-O0-NEXT:    v_readlane_b32 s1, v7, 3
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v2
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v1
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v0
-; GFX9-O0-NEXT:    s_or_saveexec_b64 s[20:21], -1
-; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[24:27], 0 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    s_mov_b64 exec, s[20:21]
+; GFX9-O0-NEXT:    v_readlane_b32 s0, v3, 2
+; GFX9-O0-NEXT:    v_readlane_b32 s1, v3, 3
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v2
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v1
+; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v0
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[2:3]
 ; GFX9-O0-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x24
 ; GFX9-O0-NEXT:    s_load_dwordx2 s[6:7], s[0:1], 0x2c
@@ -373,23 +361,19 @@ define amdgpu_kernel void @call(ptr addrspace(8) inreg %tmp14, i32 inreg %arg) {
 ; GFX9-O0-NEXT:    s_mov_b32 s17, s7
 ; GFX9-O0-NEXT:    s_mov_b32 s18, s6
 ; GFX9-O0-NEXT:    s_mov_b32 s19, s3
-; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s16, 4
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s17, 5
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s18, 6
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s19, 7
+; GFX9-O0-NEXT:    v_writelane_b32 v3, s16, 4
+; GFX9-O0-NEXT:    v_writelane_b32 v3, s17, 5
+; GFX9-O0-NEXT:    v_writelane_b32 v3, s18, 6
+; GFX9-O0-NEXT:    v_writelane_b32 v3, s19, 7
 ; GFX9-O0-NEXT:    s_mov_b32 s3, 0
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s3, 8
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, s2
+; GFX9-O0-NEXT:    v_writelane_b32 v3, s3, 8
+; GFX9-O0-NEXT:    v_mov_b32_e32 v7, s2
 ; GFX9-O0-NEXT:    s_not_b64 exec, exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, s3
+; GFX9-O0-NEXT:    v_mov_b32_e32 v7, s3
 ; GFX9-O0-NEXT:    s_not_b64 exec, exec
 ; GFX9-O0-NEXT:    s_or_saveexec_b64 s[2:3], -1
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s2, 9
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s3, 10
-; GFX9-O0-NEXT:    s_or_saveexec_b64 s[20:21], -1
-; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[24:27], 0 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    s_mov_b64 exec, s[20:21]
+; GFX9-O0-NEXT:    v_writelane_b32 v3, s2, 9
+; GFX9-O0-NEXT:    v_writelane_b32 v3, s3, 10
 ; GFX9-O0-NEXT:    s_mov_b64 s[6:7], 56
 ; GFX9-O0-NEXT:    s_mov_b32 s2, s0
 ; GFX9-O0-NEXT:    s_mov_b32 s0, s1
@@ -405,35 +389,28 @@ define amdgpu_kernel void @call(ptr addrspace(8) inreg %tmp14, i32 inreg %arg) {
 ; GFX9-O0-NEXT:    s_mov_b64 s[0:1], s[24:25]
 ; GFX9-O0-NEXT:    s_mov_b64 s[2:3], s[26:27]
 ; GFX9-O0-NEXT:    s_mov_b32 s6, 20
-; GFX9-O0-NEXT:    v_lshlrev_b32_e64 v3, s6, v3
-; GFX9-O0-NEXT:    s_mov_b32 s6, 10
 ; GFX9-O0-NEXT:    v_lshlrev_b32_e64 v4, s6, v4
-; GFX9-O0-NEXT:    v_or3_b32 v3, v5, v4, v3
+; GFX9-O0-NEXT:    s_mov_b32 s6, 10
+; GFX9-O0-NEXT:    v_lshlrev_b32_e64 v5, s6, v5
+; GFX9-O0-NEXT:    v_or3_b32 v4, v6, v5, v4
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr6_sgpr7
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr15
-; GFX9-O0-NEXT:    v_mov_b32_e32 v31, v3
-; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v6
+; GFX9-O0-NEXT:    v_mov_b32_e32 v31, v4
+; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v7
 ; GFX9-O0-NEXT:    s_swappc_b64 s[30:31], s[16:17]
-; GFX9-O0-NEXT:    s_or_saveexec_b64 s[20:21], -1
-; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[24:27], 0 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    s_mov_b64 exec, s[20:21]
-; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    v_readlane_b32 s0, v1, 4
-; GFX9-O0-NEXT:    v_readlane_b32 s1, v1, 5
-; GFX9-O0-NEXT:    v_readlane_b32 s2, v1, 6
-; GFX9-O0-NEXT:    v_readlane_b32 s3, v1, 7
-; GFX9-O0-NEXT:    v_readlane_b32 s6, v1, 9
-; GFX9-O0-NEXT:    v_readlane_b32 s7, v1, 10
-; GFX9-O0-NEXT:    v_readlane_b32 s4, v1, 8
+; GFX9-O0-NEXT:    v_readlane_b32 s0, v3, 4
+; GFX9-O0-NEXT:    v_readlane_b32 s1, v3, 5
+; GFX9-O0-NEXT:    v_readlane_b32 s2, v3, 6
+; GFX9-O0-NEXT:    v_readlane_b32 s3, v3, 7
+; GFX9-O0-NEXT:    v_readlane_b32 s6, v3, 9
+; GFX9-O0-NEXT:    v_readlane_b32 s7, v3, 10
+; GFX9-O0-NEXT:    v_readlane_b32 s4, v3, 8
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v0
-; GFX9-O0-NEXT:    s_or_saveexec_b64 s[20:21], -1
-; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[24:27], 0 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    s_mov_b64 exec, s[20:21]
-; GFX9-O0-NEXT:    v_add_u32_e64 v3, v3, v6
+; GFX9-O0-NEXT:    v_add_u32_e64 v3, v3, v7
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[6:7]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v3
-; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s4 offset:4
-; GFX9-O0-NEXT:    ; kill: killed $vgpr0
+; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v3
+; GFX9-O0-NEXT:    s_nop 0
+; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s4 offset:4
 ; GFX9-O0-NEXT:    s_endpgm
 ;
 ; GFX9-O3-LABEL: call:
@@ -563,38 +540,32 @@ define i64 @called_i64(i64 %a) noinline {
 define amdgpu_kernel void @call_i64(ptr addrspace(8) inreg %tmp14, i64 inreg %arg) {
 ; GFX9-O0-LABEL: call_i64:
 ; GFX9-O0:       ; %bb.0:
-; GFX9-O0-NEXT:    s_mov_b32 s32, 0x400
+; GFX9-O0-NEXT:    s_mov_b32 s32, 0
 ; GFX9-O0-NEXT:    s_mov_b32 s24, SCRATCH_RSRC_DWORD0
 ; GFX9-O0-NEXT:    s_mov_b32 s25, SCRATCH_RSRC_DWORD1
 ; GFX9-O0-NEXT:    s_mov_b32 s26, -1
 ; GFX9-O0-NEXT:    s_mov_b32 s27, 0xe00000
 ; GFX9-O0-NEXT:    s_add_u32 s24, s24, s9
 ; GFX9-O0-NEXT:    s_addc_u32 s25, s25, 0
-; GFX9-O0-NEXT:    ; implicit-def: $vgpr12 : SGPR spill to VGPR lane
 ; GFX9-O0-NEXT:    s_or_saveexec_b64 s[10:11], -1
-; GFX9-O0-NEXT:    v_writelane_b32 v12, s10, 0
-; GFX9-O0-NEXT:    v_writelane_b32 v12, s11, 1
+; GFX9-O0-NEXT:    ; implicit-def: $vgpr8 : SGPR spill to VGPR lane
+; GFX9-O0-NEXT:    v_writelane_b32 v8, s10, 0
+; GFX9-O0-NEXT:    v_writelane_b32 v8, s11, 1
 ; GFX9-O0-NEXT:    s_mov_b32 s14, s8
 ; GFX9-O0-NEXT:    s_mov_b32 s13, s7
 ; GFX9-O0-NEXT:    s_mov_b32 s12, s6
 ; GFX9-O0-NEXT:    s_mov_b64 s[10:11], s[4:5]
 ; GFX9-O0-NEXT:    s_mov_b64 s[4:5], s[2:3]
-; GFX9-O0-NEXT:    v_readlane_b32 s2, v12, 0
-; GFX9-O0-NEXT:    v_readlane_b32 s3, v12, 1
-; GFX9-O0-NEXT:    v_writelane_b32 v12, s4, 2
-; GFX9-O0-NEXT:    v_writelane_b32 v12, s5, 3
-; GFX9-O0-NEXT:    s_or_saveexec_b64 s[20:21], -1
-; GFX9-O0-NEXT:    buffer_store_dword v12, off, s[24:27], 0 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    s_mov_b64 exec, s[20:21]
+; GFX9-O0-NEXT:    v_readlane_b32 s2, v8, 0
+; GFX9-O0-NEXT:    v_readlane_b32 s3, v8, 1
+; GFX9-O0-NEXT:    v_writelane_b32 v8, s4, 2
+; GFX9-O0-NEXT:    v_writelane_b32 v8, s5, 3
 ; GFX9-O0-NEXT:    s_mov_b64 s[4:5], s[0:1]
-; GFX9-O0-NEXT:    v_readlane_b32 s0, v12, 2
-; GFX9-O0-NEXT:    v_readlane_b32 s1, v12, 3
+; GFX9-O0-NEXT:    v_readlane_b32 s0, v8, 2
+; GFX9-O0-NEXT:    v_readlane_b32 s1, v8, 3
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v2
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v1
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v0
-; GFX9-O0-NEXT:    s_or_saveexec_b64 s[20:21], -1
-; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[24:27], 0 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    s_mov_b64 exec, s[20:21]
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[2:3]
 ; GFX9-O0-NEXT:    s_load_dwordx2 s[16:17], s[0:1], 0x24
 ; GFX9-O0-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x2c
@@ -608,24 +579,20 @@ define amdgpu_kernel void @call_i64(ptr addrspace(8) inreg %tmp14, i64 inreg %ar
 ; GFX9-O0-NEXT:    s_mov_b32 s17, s8
 ; GFX9-O0-NEXT:    s_mov_b32 s18, s7
 ; GFX9-O0-NEXT:    s_mov_b32 s19, s6
-; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s16, 4
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s17, 5
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s18, 6
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s19, 7
+; GFX9-O0-NEXT:    v_writelane_b32 v8, s16, 4
+; GFX9-O0-NEXT:    v_writelane_b32 v8, s17, 5
+; GFX9-O0-NEXT:    v_writelane_b32 v8, s18, 6
+; GFX9-O0-NEXT:    v_writelane_b32 v8, s19, 7
 ; GFX9-O0-NEXT:    s_mov_b64 s[6:7], 0
-; GFX9-O0-NEXT:    v_mov_b32_e32 v8, s2
-; GFX9-O0-NEXT:    v_mov_b32_e32 v9, s3
+; GFX9-O0-NEXT:    v_mov_b32_e32 v9, s2
+; GFX9-O0-NEXT:    v_mov_b32_e32 v10, s3
 ; GFX9-O0-NEXT:    s_not_b64 exec, exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v8, s6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v9, s7
+; GFX9-O0-NEXT:    v_mov_b32_e32 v9, s6
+; GFX9-O0-NEXT:    v_mov_b32_e32 v10, s7
 ; GFX9-O0-NEXT:    s_not_b64 exec, exec
 ; GFX9-O0-NEXT:    s_or_saveexec_b64 s[2:3], -1
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s2, 8
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s3, 9
-; GFX9-O0-NEXT:    s_or_saveexec_b64 s[20:21], -1
-; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[24:27], 0 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    s_mov_b64 exec, s[20:21]
+; GFX9-O0-NEXT:    v_writelane_b32 v8, s2, 8
+; GFX9-O0-NEXT:    v_writelane_b32 v8, s3, 9
 ; GFX9-O0-NEXT:    s_mov_b64 s[6:7], 60
 ; GFX9-O0-NEXT:    s_mov_b32 s2, s0
 ; GFX9-O0-NEXT:    s_mov_b32 s0, s1
@@ -635,11 +602,11 @@ define amdgpu_kernel void @call_i64(ptr addrspace(8) inreg %tmp14, i64 inreg %ar
 ; GFX9-O0-NEXT:    s_addc_u32 s0, s0, s1
 ; GFX9-O0-NEXT:    ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9
 ; GFX9-O0-NEXT:    s_mov_b32 s9, s0
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v8
+; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v9
 ; GFX9-O0-NEXT:    s_mov_b32 s0, 32
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr2_sgpr3
-; GFX9-O0-NEXT:    v_lshrrev_b64 v[10:11], s0, v[8:9]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v10
+; GFX9-O0-NEXT:    v_lshrrev_b64 v[11:12], s0, v[9:10]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v11
 ; GFX9-O0-NEXT:    s_getpc_b64 s[0:1]
 ; GFX9-O0-NEXT:    s_add_u32 s0, s0, called_i64 at gotpcrel32@lo+4
 ; GFX9-O0-NEXT:    s_addc_u32 s1, s1, called_i64 at gotpcrel32@hi+12
@@ -658,33 +625,25 @@ define amdgpu_kernel void @call_i64(ptr addrspace(8) inreg %tmp14, i64 inreg %ar
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v7
 ; GFX9-O0-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-O0-NEXT:    s_swappc_b64 s[30:31], s[16:17]
-; GFX9-O0-NEXT:    s_or_saveexec_b64 s[20:21], -1
-; GFX9-O0-NEXT:    buffer_load_dword v2, off, s[24:27], 0 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    s_mov_b64 exec, s[20:21]
-; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    v_readlane_b32 s0, v2, 4
-; GFX9-O0-NEXT:    v_readlane_b32 s1, v2, 5
-; GFX9-O0-NEXT:    v_readlane_b32 s2, v2, 6
-; GFX9-O0-NEXT:    v_readlane_b32 s3, v2, 7
-; GFX9-O0-NEXT:    v_readlane_b32 s4, v2, 8
-; GFX9-O0-NEXT:    v_readlane_b32 s5, v2, 9
+; GFX9-O0-NEXT:    v_readlane_b32 s0, v8, 4
+; GFX9-O0-NEXT:    v_readlane_b32 s1, v8, 5
+; GFX9-O0-NEXT:    v_readlane_b32 s2, v8, 6
+; GFX9-O0-NEXT:    v_readlane_b32 s3, v8, 7
+; GFX9-O0-NEXT:    v_readlane_b32 s4, v8, 8
+; GFX9-O0-NEXT:    v_readlane_b32 s5, v8, 9
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v0
-; GFX9-O0-NEXT:    s_or_saveexec_b64 s[20:21], -1
-; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[24:27], 0 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    s_mov_b64 exec, s[20:21]
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v1
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v8
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v9
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v9
+; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v10
 ; GFX9-O0-NEXT:    v_add_co_u32_e64 v3, s[6:7], v3, v5
 ; GFX9-O0-NEXT:    v_addc_co_u32_e64 v4, s[6:7], v4, v6, s[6:7]
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[4:5]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v3
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v4
+; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v3
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v4
 ; GFX9-O0-NEXT:    s_mov_b32 s4, 0
-; GFX9-O0-NEXT:    buffer_store_dwordx2 v[1:2], off, s[0:3], s4 offset:4
-; GFX9-O0-NEXT:    ; kill: killed $vgpr0
+; GFX9-O0-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], s4 offset:4
 ; GFX9-O0-NEXT:    s_endpgm
 ;
 ; GFX9-O3-LABEL: call_i64:
@@ -991,15 +950,10 @@ define amdgpu_cs void @strict_wwm_cfg(ptr addrspace(8) inreg %tmp14, i32 %arg) {
 ; GFX9-O0-NEXT:    s_mov_b32 s19, 0xe00000
 ; GFX9-O0-NEXT:    s_add_u32 s16, s16, s4
 ; GFX9-O0-NEXT:    s_addc_u32 s17, s17, 0
-; GFX9-O0-NEXT:    ; implicit-def: $vgpr3 : SGPR spill to VGPR lane
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v0
-; GFX9-O0-NEXT:    s_or_saveexec_b64 s[12:13], -1
-; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[16:19], 0 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    s_mov_b64 exec, s[12:13]
-; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s3, 0
+; GFX9-O0-NEXT:    ; implicit-def: $vgpr5 : SGPR spill to VGPR lane
+; GFX9-O0-NEXT:    v_writelane_b32 v5, s3, 0
 ; GFX9-O0-NEXT:    s_mov_b32 s4, s1
-; GFX9-O0-NEXT:    v_readlane_b32 s1, v0, 0
+; GFX9-O0-NEXT:    v_readlane_b32 s1, v5, 0
 ; GFX9-O0-NEXT:    ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3
 ; GFX9-O0-NEXT:    s_mov_b32 s3, s1
 ; GFX9-O0-NEXT:    s_mov_b32 s8, s3
@@ -1012,19 +966,19 @@ define amdgpu_cs void @strict_wwm_cfg(ptr addrspace(8) inreg %tmp14, i32 %arg) {
 ; GFX9-O0-NEXT:    s_mov_b32 s5, s10
 ; GFX9-O0-NEXT:    s_mov_b32 s6, s9
 ; GFX9-O0-NEXT:    s_mov_b32 s7, s8
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s2, 1
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s3, 2
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s0, 3
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s1, 4
+; GFX9-O0-NEXT:    v_writelane_b32 v5, s2, 1
+; GFX9-O0-NEXT:    v_writelane_b32 v5, s3, 2
+; GFX9-O0-NEXT:    v_writelane_b32 v5, s0, 3
+; GFX9-O0-NEXT:    v_writelane_b32 v5, s1, 4
 ; GFX9-O0-NEXT:    s_mov_b32 s0, 0
 ; GFX9-O0-NEXT:    s_nop 2
-; GFX9-O0-NEXT:    buffer_load_dwordx2 v[4:5], off, s[4:7], s0
+; GFX9-O0-NEXT:    buffer_load_dwordx2 v[3:4], off, s[4:7], s0
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[16:19], 0 offset:12 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[16:19], 0 offset:12 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[16:19], 0 offset:16 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[16:19], 0 offset:16 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr2_sgpr3
-; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v4
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v3
 ; GFX9-O0-NEXT:    s_not_b64 exec, exec
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v1, s0
 ; GFX9-O0-NEXT:    s_not_b64 exec, exec
@@ -1034,16 +988,16 @@ define amdgpu_cs void @strict_wwm_cfg(ptr addrspace(8) inreg %tmp14, i32 %arg) {
 ; GFX9-O0-NEXT:    v_mov_b32_dpp v2, v1 row_bcast:31 row_mask:0xc bank_mask:0xf
 ; GFX9-O0-NEXT:    v_add_u32_e64 v1, v1, v2
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[2:3]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v1
-; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[16:19], 0 offset:8 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    v_cmp_eq_u32_e64 s[2:3], v3, s0
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, s0
-; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[16:19], 0 offset:4 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v1
+; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[16:19], 0 offset:8 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    v_cmp_eq_u32_e64 s[2:3], v0, s0
+; GFX9-O0-NEXT:    v_mov_b32_e32 v0, s0
+; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[16:19], 0 offset:4 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_mov_b64 s[0:1], exec
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s0, 5
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s1, 6
+; GFX9-O0-NEXT:    v_writelane_b32 v5, s0, 5
+; GFX9-O0-NEXT:    v_writelane_b32 v5, s1, 6
 ; GFX9-O0-NEXT:    s_or_saveexec_b64 s[12:13], -1
-; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[16:19], 0 ; 4-byte Folded Spill
+; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[16:19], 0 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[12:13]
 ; GFX9-O0-NEXT:    s_and_b64 s[0:1], s[0:1], s[2:3]
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[0:1]
@@ -1068,25 +1022,25 @@ define amdgpu_cs void @strict_wwm_cfg(ptr addrspace(8) inreg %tmp14, i32 %arg) {
 ; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[16:19], 0 offset:4 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:  .LBB8_2: ; %merge
 ; GFX9-O0-NEXT:    s_or_saveexec_b64 s[12:13], -1
-; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[16:19], 0 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v5, off, s[16:19], 0 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[12:13]
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    v_readlane_b32 s4, v0, 5
-; GFX9-O0-NEXT:    v_readlane_b32 s5, v0, 6
+; GFX9-O0-NEXT:    v_readlane_b32 s4, v5, 5
+; GFX9-O0-NEXT:    v_readlane_b32 s5, v5, 6
 ; GFX9-O0-NEXT:    s_or_b64 exec, exec, s[4:5]
-; GFX9-O0-NEXT:    v_readlane_b32 s2, v0, 1
-; GFX9-O0-NEXT:    v_readlane_b32 s3, v0, 2
-; GFX9-O0-NEXT:    v_readlane_b32 s0, v0, 3
-; GFX9-O0-NEXT:    v_readlane_b32 s1, v0, 4
-; GFX9-O0-NEXT:    buffer_load_dword v3, off, s[16:19], 0 offset:8 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    buffer_load_dword v4, off, s[16:19], 0 offset:4 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    v_readlane_b32 s2, v5, 1
+; GFX9-O0-NEXT:    v_readlane_b32 s3, v5, 2
+; GFX9-O0-NEXT:    v_readlane_b32 s0, v5, 3
+; GFX9-O0-NEXT:    v_readlane_b32 s1, v5, 4
+; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[16:19], 0 offset:8 ; 4-byte Folded Reload
+; GFX9-O0-NEXT:    buffer_load_dword v3, off, s[16:19], 0 offset:4 ; 4-byte Folded Reload
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    v_cmp_eq_u32_e64 s[4:5], v3, v4
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v3, 0, 1, s[4:5]
+; GFX9-O0-NEXT:    v_cmp_eq_u32_e64 s[4:5], v0, v3
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
 ; GFX9-O0-NEXT:    s_mov_b32 s4, 1
-; GFX9-O0-NEXT:    v_lshlrev_b32_e64 v3, s4, v3
+; GFX9-O0-NEXT:    v_lshlrev_b32_e64 v0, s4, v0
 ; GFX9-O0-NEXT:    s_mov_b32 s4, 2
-; GFX9-O0-NEXT:    v_and_b32_e64 v3, v3, s4
+; GFX9-O0-NEXT:    v_and_b32_e64 v0, v0, s4
 ; GFX9-O0-NEXT:    s_mov_b32 s6, s1
 ; GFX9-O0-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
 ; GFX9-O0-NEXT:    s_mov_b32 s4, s3
@@ -1096,8 +1050,7 @@ define amdgpu_cs void @strict_wwm_cfg(ptr addrspace(8) inreg %tmp14, i32 %arg) {
 ; GFX9-O0-NEXT:    s_mov_b32 s2, s5
 ; GFX9-O0-NEXT:    s_mov_b32 s3, s4
 ; GFX9-O0-NEXT:    s_mov_b32 s4, 0
-; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s4 offset:4
-; GFX9-O0-NEXT:    ; kill: killed $vgpr0
+; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s4 offset:4
 ; GFX9-O0-NEXT:    s_endpgm
 ;
 ; GFX9-O3-LABEL: strict_wwm_cfg:
@@ -1198,38 +1151,32 @@ define hidden i32 @strict_wwm_called(i32 %a) noinline {
 define amdgpu_kernel void @strict_wwm_call(ptr addrspace(8) inreg %tmp14, i32 inreg %arg) {
 ; GFX9-O0-LABEL: strict_wwm_call:
 ; GFX9-O0:       ; %bb.0:
-; GFX9-O0-NEXT:    s_mov_b32 s32, 0x400
+; GFX9-O0-NEXT:    s_mov_b32 s32, 0
 ; GFX9-O0-NEXT:    s_mov_b32 s24, SCRATCH_RSRC_DWORD0
 ; GFX9-O0-NEXT:    s_mov_b32 s25, SCRATCH_RSRC_DWORD1
 ; GFX9-O0-NEXT:    s_mov_b32 s26, -1
 ; GFX9-O0-NEXT:    s_mov_b32 s27, 0xe00000
 ; GFX9-O0-NEXT:    s_add_u32 s24, s24, s9
 ; GFX9-O0-NEXT:    s_addc_u32 s25, s25, 0
-; GFX9-O0-NEXT:    ; implicit-def: $vgpr7 : SGPR spill to VGPR lane
 ; GFX9-O0-NEXT:    s_or_saveexec_b64 s[10:11], -1
-; GFX9-O0-NEXT:    v_writelane_b32 v7, s10, 0
-; GFX9-O0-NEXT:    v_writelane_b32 v7, s11, 1
+; GFX9-O0-NEXT:    ; implicit-def: $vgpr3 : SGPR spill to VGPR lane
+; GFX9-O0-NEXT:    v_writelane_b32 v3, s10, 0
+; GFX9-O0-NEXT:    v_writelane_b32 v3, s11, 1
 ; GFX9-O0-NEXT:    s_mov_b32 s14, s8
 ; GFX9-O0-NEXT:    s_mov_b32 s13, s7
 ; GFX9-O0-NEXT:    s_mov_b32 s12, s6
 ; GFX9-O0-NEXT:    s_mov_b64 s[10:11], s[4:5]
 ; GFX9-O0-NEXT:    s_mov_b64 s[4:5], s[2:3]
-; GFX9-O0-NEXT:    v_readlane_b32 s2, v7, 0
-; GFX9-O0-NEXT:    v_readlane_b32 s3, v7, 1
-; GFX9-O0-NEXT:    v_writelane_b32 v7, s4, 2
-; GFX9-O0-NEXT:    v_writelane_b32 v7, s5, 3
-; GFX9-O0-NEXT:    s_or_saveexec_b64 s[20:21], -1
-; GFX9-O0-NEXT:    buffer_store_dword v7, off, s[24:27], 0 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    s_mov_b64 exec, s[20:21]
+; GFX9-O0-NEXT:    v_readlane_b32 s2, v3, 0
+; GFX9-O0-NEXT:    v_readlane_b32 s3, v3, 1
+; GFX9-O0-NEXT:    v_writelane_b32 v3, s4, 2
+; GFX9-O0-NEXT:    v_writelane_b32 v3, s5, 3
 ; GFX9-O0-NEXT:    s_mov_b64 s[4:5], s[0:1]
-; GFX9-O0-NEXT:    v_readlane_b32 s0, v7, 2
-; GFX9-O0-NEXT:    v_readlane_b32 s1, v7, 3
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v2
-; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v1
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v0
-; GFX9-O0-NEXT:    s_or_saveexec_b64 s[20:21], -1
-; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[24:27], 0 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    s_mov_b64 exec, s[20:21]
+; GFX9-O0-NEXT:    v_readlane_b32 s0, v3, 2
+; GFX9-O0-NEXT:    v_readlane_b32 s1, v3, 3
+; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v2
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v1
+; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v0
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[2:3]
 ; GFX9-O0-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x24
 ; GFX9-O0-NEXT:    s_load_dwordx2 s[6:7], s[0:1], 0x2c
@@ -1243,23 +1190,19 @@ define amdgpu_kernel void @strict_wwm_call(ptr addrspace(8) inreg %tmp14, i32 in
 ; GFX9-O0-NEXT:    s_mov_b32 s17, s7
 ; GFX9-O0-NEXT:    s_mov_b32 s18, s6
 ; GFX9-O0-NEXT:    s_mov_b32 s19, s3
-; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s16, 4
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s17, 5
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s18, 6
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s19, 7
+; GFX9-O0-NEXT:    v_writelane_b32 v3, s16, 4
+; GFX9-O0-NEXT:    v_writelane_b32 v3, s17, 5
+; GFX9-O0-NEXT:    v_writelane_b32 v3, s18, 6
+; GFX9-O0-NEXT:    v_writelane_b32 v3, s19, 7
 ; GFX9-O0-NEXT:    s_mov_b32 s3, 0
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s3, 8
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, s2
+; GFX9-O0-NEXT:    v_writelane_b32 v3, s3, 8
+; GFX9-O0-NEXT:    v_mov_b32_e32 v7, s2
 ; GFX9-O0-NEXT:    s_not_b64 exec, exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, s3
+; GFX9-O0-NEXT:    v_mov_b32_e32 v7, s3
 ; GFX9-O0-NEXT:    s_not_b64 exec, exec
 ; GFX9-O0-NEXT:    s_or_saveexec_b64 s[2:3], -1
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s2, 9
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s3, 10
-; GFX9-O0-NEXT:    s_or_saveexec_b64 s[20:21], -1
-; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[24:27], 0 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    s_mov_b64 exec, s[20:21]
+; GFX9-O0-NEXT:    v_writelane_b32 v3, s2, 9
+; GFX9-O0-NEXT:    v_writelane_b32 v3, s3, 10
 ; GFX9-O0-NEXT:    s_mov_b64 s[6:7], 56
 ; GFX9-O0-NEXT:    s_mov_b32 s2, s0
 ; GFX9-O0-NEXT:    s_mov_b32 s0, s1
@@ -1275,35 +1218,28 @@ define amdgpu_kernel void @strict_wwm_call(ptr addrspace(8) inreg %tmp14, i32 in
 ; GFX9-O0-NEXT:    s_mov_b64 s[0:1], s[24:25]
 ; GFX9-O0-NEXT:    s_mov_b64 s[2:3], s[26:27]
 ; GFX9-O0-NEXT:    s_mov_b32 s6, 20
-; GFX9-O0-NEXT:    v_lshlrev_b32_e64 v3, s6, v3
-; GFX9-O0-NEXT:    s_mov_b32 s6, 10
 ; GFX9-O0-NEXT:    v_lshlrev_b32_e64 v4, s6, v4
-; GFX9-O0-NEXT:    v_or3_b32 v3, v5, v4, v3
+; GFX9-O0-NEXT:    s_mov_b32 s6, 10
+; GFX9-O0-NEXT:    v_lshlrev_b32_e64 v5, s6, v5
+; GFX9-O0-NEXT:    v_or3_b32 v4, v6, v5, v4
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr6_sgpr7
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr15
-; GFX9-O0-NEXT:    v_mov_b32_e32 v31, v3
-; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v6
+; GFX9-O0-NEXT:    v_mov_b32_e32 v31, v4
+; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v7
 ; GFX9-O0-NEXT:    s_swappc_b64 s[30:31], s[16:17]
-; GFX9-O0-NEXT:    s_or_saveexec_b64 s[20:21], -1
-; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[24:27], 0 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    s_mov_b64 exec, s[20:21]
-; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    v_readlane_b32 s0, v1, 4
-; GFX9-O0-NEXT:    v_readlane_b32 s1, v1, 5
-; GFX9-O0-NEXT:    v_readlane_b32 s2, v1, 6
-; GFX9-O0-NEXT:    v_readlane_b32 s3, v1, 7
-; GFX9-O0-NEXT:    v_readlane_b32 s6, v1, 9
-; GFX9-O0-NEXT:    v_readlane_b32 s7, v1, 10
-; GFX9-O0-NEXT:    v_readlane_b32 s4, v1, 8
+; GFX9-O0-NEXT:    v_readlane_b32 s0, v3, 4
+; GFX9-O0-NEXT:    v_readlane_b32 s1, v3, 5
+; GFX9-O0-NEXT:    v_readlane_b32 s2, v3, 6
+; GFX9-O0-NEXT:    v_readlane_b32 s3, v3, 7
+; GFX9-O0-NEXT:    v_readlane_b32 s6, v3, 9
+; GFX9-O0-NEXT:    v_readlane_b32 s7, v3, 10
+; GFX9-O0-NEXT:    v_readlane_b32 s4, v3, 8
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v0
-; GFX9-O0-NEXT:    s_or_saveexec_b64 s[20:21], -1
-; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[24:27], 0 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    s_mov_b64 exec, s[20:21]
-; GFX9-O0-NEXT:    v_add_u32_e64 v3, v3, v6
+; GFX9-O0-NEXT:    v_add_u32_e64 v3, v3, v7
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[6:7]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v3
-; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s4 offset:4
-; GFX9-O0-NEXT:    ; kill: killed $vgpr0
+; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v3
+; GFX9-O0-NEXT:    s_nop 0
+; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s4 offset:4
 ; GFX9-O0-NEXT:    s_endpgm
 ;
 ; GFX9-O3-LABEL: strict_wwm_call:
@@ -1433,38 +1369,32 @@ define i64 @strict_wwm_called_i64(i64 %a) noinline {
 define amdgpu_kernel void @strict_wwm_call_i64(ptr addrspace(8) inreg %tmp14, i64 inreg %arg) {
 ; GFX9-O0-LABEL: strict_wwm_call_i64:
 ; GFX9-O0:       ; %bb.0:
-; GFX9-O0-NEXT:    s_mov_b32 s32, 0x400
+; GFX9-O0-NEXT:    s_mov_b32 s32, 0
 ; GFX9-O0-NEXT:    s_mov_b32 s24, SCRATCH_RSRC_DWORD0
 ; GFX9-O0-NEXT:    s_mov_b32 s25, SCRATCH_RSRC_DWORD1
 ; GFX9-O0-NEXT:    s_mov_b32 s26, -1
 ; GFX9-O0-NEXT:    s_mov_b32 s27, 0xe00000
 ; GFX9-O0-NEXT:    s_add_u32 s24, s24, s9
 ; GFX9-O0-NEXT:    s_addc_u32 s25, s25, 0
-; GFX9-O0-NEXT:    ; implicit-def: $vgpr12 : SGPR spill to VGPR lane
 ; GFX9-O0-NEXT:    s_or_saveexec_b64 s[10:11], -1
-; GFX9-O0-NEXT:    v_writelane_b32 v12, s10, 0
-; GFX9-O0-NEXT:    v_writelane_b32 v12, s11, 1
+; GFX9-O0-NEXT:    ; implicit-def: $vgpr8 : SGPR spill to VGPR lane
+; GFX9-O0-NEXT:    v_writelane_b32 v8, s10, 0
+; GFX9-O0-NEXT:    v_writelane_b32 v8, s11, 1
 ; GFX9-O0-NEXT:    s_mov_b32 s14, s8
 ; GFX9-O0-NEXT:    s_mov_b32 s13, s7
 ; GFX9-O0-NEXT:    s_mov_b32 s12, s6
 ; GFX9-O0-NEXT:    s_mov_b64 s[10:11], s[4:5]
 ; GFX9-O0-NEXT:    s_mov_b64 s[4:5], s[2:3]
-; GFX9-O0-NEXT:    v_readlane_b32 s2, v12, 0
-; GFX9-O0-NEXT:    v_readlane_b32 s3, v12, 1
-; GFX9-O0-NEXT:    v_writelane_b32 v12, s4, 2
-; GFX9-O0-NEXT:    v_writelane_b32 v12, s5, 3
-; GFX9-O0-NEXT:    s_or_saveexec_b64 s[20:21], -1
-; GFX9-O0-NEXT:    buffer_store_dword v12, off, s[24:27], 0 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    s_mov_b64 exec, s[20:21]
+; GFX9-O0-NEXT:    v_readlane_b32 s2, v8, 0
+; GFX9-O0-NEXT:    v_readlane_b32 s3, v8, 1
+; GFX9-O0-NEXT:    v_writelane_b32 v8, s4, 2
+; GFX9-O0-NEXT:    v_writelane_b32 v8, s5, 3
 ; GFX9-O0-NEXT:    s_mov_b64 s[4:5], s[0:1]
-; GFX9-O0-NEXT:    v_readlane_b32 s0, v12, 2
-; GFX9-O0-NEXT:    v_readlane_b32 s1, v12, 3
+; GFX9-O0-NEXT:    v_readlane_b32 s0, v8, 2
+; GFX9-O0-NEXT:    v_readlane_b32 s1, v8, 3
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v2
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v1
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v0
-; GFX9-O0-NEXT:    s_or_saveexec_b64 s[20:21], -1
-; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[24:27], 0 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    s_mov_b64 exec, s[20:21]
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[2:3]
 ; GFX9-O0-NEXT:    s_load_dwordx2 s[16:17], s[0:1], 0x24
 ; GFX9-O0-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x2c
@@ -1478,24 +1408,20 @@ define amdgpu_kernel void @strict_wwm_call_i64(ptr addrspace(8) inreg %tmp14, i6
 ; GFX9-O0-NEXT:    s_mov_b32 s17, s8
 ; GFX9-O0-NEXT:    s_mov_b32 s18, s7
 ; GFX9-O0-NEXT:    s_mov_b32 s19, s6
-; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s16, 4
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s17, 5
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s18, 6
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s19, 7
+; GFX9-O0-NEXT:    v_writelane_b32 v8, s16, 4
+; GFX9-O0-NEXT:    v_writelane_b32 v8, s17, 5
+; GFX9-O0-NEXT:    v_writelane_b32 v8, s18, 6
+; GFX9-O0-NEXT:    v_writelane_b32 v8, s19, 7
 ; GFX9-O0-NEXT:    s_mov_b64 s[6:7], 0
-; GFX9-O0-NEXT:    v_mov_b32_e32 v8, s2
-; GFX9-O0-NEXT:    v_mov_b32_e32 v9, s3
+; GFX9-O0-NEXT:    v_mov_b32_e32 v9, s2
+; GFX9-O0-NEXT:    v_mov_b32_e32 v10, s3
 ; GFX9-O0-NEXT:    s_not_b64 exec, exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v8, s6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v9, s7
+; GFX9-O0-NEXT:    v_mov_b32_e32 v9, s6
+; GFX9-O0-NEXT:    v_mov_b32_e32 v10, s7
 ; GFX9-O0-NEXT:    s_not_b64 exec, exec
 ; GFX9-O0-NEXT:    s_or_saveexec_b64 s[2:3], -1
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s2, 8
-; GFX9-O0-NEXT:    v_writelane_b32 v0, s3, 9
-; GFX9-O0-NEXT:    s_or_saveexec_b64 s[20:21], -1
-; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[24:27], 0 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    s_mov_b64 exec, s[20:21]
+; GFX9-O0-NEXT:    v_writelane_b32 v8, s2, 8
+; GFX9-O0-NEXT:    v_writelane_b32 v8, s3, 9
 ; GFX9-O0-NEXT:    s_mov_b64 s[6:7], 60
 ; GFX9-O0-NEXT:    s_mov_b32 s2, s0
 ; GFX9-O0-NEXT:    s_mov_b32 s0, s1
@@ -1505,11 +1431,11 @@ define amdgpu_kernel void @strict_wwm_call_i64(ptr addrspace(8) inreg %tmp14, i6
 ; GFX9-O0-NEXT:    s_addc_u32 s0, s0, s1
 ; GFX9-O0-NEXT:    ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9
 ; GFX9-O0-NEXT:    s_mov_b32 s9, s0
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v8
+; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v9
 ; GFX9-O0-NEXT:    s_mov_b32 s0, 32
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr2_sgpr3
-; GFX9-O0-NEXT:    v_lshrrev_b64 v[10:11], s0, v[8:9]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v10
+; GFX9-O0-NEXT:    v_lshrrev_b64 v[11:12], s0, v[9:10]
+; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v11
 ; GFX9-O0-NEXT:    s_getpc_b64 s[0:1]
 ; GFX9-O0-NEXT:    s_add_u32 s0, s0, strict_wwm_called_i64 at gotpcrel32@lo+4
 ; GFX9-O0-NEXT:    s_addc_u32 s1, s1, strict_wwm_called_i64 at gotpcrel32@hi+12
@@ -1528,33 +1454,25 @@ define amdgpu_kernel void @strict_wwm_call_i64(ptr addrspace(8) inreg %tmp14, i6
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v7
 ; GFX9-O0-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-O0-NEXT:    s_swappc_b64 s[30:31], s[16:17]
-; GFX9-O0-NEXT:    s_or_saveexec_b64 s[20:21], -1
-; GFX9-O0-NEXT:    buffer_load_dword v2, off, s[24:27], 0 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    s_mov_b64 exec, s[20:21]
-; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT:    v_readlane_b32 s0, v2, 4
-; GFX9-O0-NEXT:    v_readlane_b32 s1, v2, 5
-; GFX9-O0-NEXT:    v_readlane_b32 s2, v2, 6
-; GFX9-O0-NEXT:    v_readlane_b32 s3, v2, 7
-; GFX9-O0-NEXT:    v_readlane_b32 s4, v2, 8
-; GFX9-O0-NEXT:    v_readlane_b32 s5, v2, 9
+; GFX9-O0-NEXT:    v_readlane_b32 s0, v8, 4
+; GFX9-O0-NEXT:    v_readlane_b32 s1, v8, 5
+; GFX9-O0-NEXT:    v_readlane_b32 s2, v8, 6
+; GFX9-O0-NEXT:    v_readlane_b32 s3, v8, 7
+; GFX9-O0-NEXT:    v_readlane_b32 s4, v8, 8
+; GFX9-O0-NEXT:    v_readlane_b32 s5, v8, 9
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v0
-; GFX9-O0-NEXT:    s_or_saveexec_b64 s[20:21], -1
-; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[24:27], 0 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    s_mov_b64 exec, s[20:21]
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v1
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v8
-; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v9
+; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v9
+; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v10
 ; GFX9-O0-NEXT:    v_add_co_u32_e64 v3, s[6:7], v3, v5
 ; GFX9-O0-NEXT:    v_addc_co_u32_e64 v4, s[6:7], v4, v6, s[6:7]
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[4:5]
-; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v3
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v4
+; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v3
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v4
 ; GFX9-O0-NEXT:    s_mov_b32 s4, 0
-; GFX9-O0-NEXT:    buffer_store_dwordx2 v[1:2], off, s[0:3], s4 offset:4
-; GFX9-O0-NEXT:    ; kill: killed $vgpr0
+; GFX9-O0-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], s4 offset:4
 ; GFX9-O0-NEXT:    s_endpgm
 ;
 ; GFX9-O3-LABEL: strict_wwm_call_i64:

>From b47f84d7b23a5fbf10a99eb1ff96abb55d135cc5 Mon Sep 17 00:00:00 2001
From: Christudasan Devadasan <Christudasan.Devadasan at amd.com>
Date: Thu, 6 Jun 2024 10:26:56 +0530
Subject: [PATCH 03/13] some fixups

---
 llvm/lib/Target/AMDGPU/AMDGPUReserveWWMRegs.cpp | 3 +--
 llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp  | 6 ++----
 llvm/lib/Target/AMDGPU/SIFrameLowering.cpp      | 6 +++---
 llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp    | 6 +++---
 4 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUReserveWWMRegs.cpp b/llvm/lib/Target/AMDGPU/AMDGPUReserveWWMRegs.cpp
index 5ed8cd4231d00..9066545138baa 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUReserveWWMRegs.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUReserveWWMRegs.cpp
@@ -1,5 +1,4 @@
-//===-- AMDGPUReserveWWMRegs.cpp - Add WWM Regs to the reserved regs list
-//---===//
+//===-- AMDGPUReserveWWMRegs.cpp - Add WWM Regs to reserved regs list -----===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index e3375c758b8d5..6166be79c1682 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -1456,8 +1456,7 @@ bool GCNPassConfig::addRegAssignAndRewriteFast() {
   // Equivalent of PEI for SGPRs.
   addPass(&SILowerSGPRSpillsID);
 
-  // To Allocate wwm registers used in whole quad mode operations (for pixel
-  // shaders).
+  // To Allocate wwm registers used in whole quad mode operations (for shaders).
   addPass(&SIPreAllocateWWMRegsID);
 
   // For allocating other wwm register operands.
@@ -1489,8 +1488,7 @@ bool GCNPassConfig::addRegAssignAndRewriteOptimized() {
   // Equivalent of PEI for SGPRs.
   addPass(&SILowerSGPRSpillsID);
 
-  // To Allocate wwm registers used in whole quad mode operations (for pixel
-  // shaders).
+  // To Allocate wwm registers used in whole quad mode operations (for shaders).
   addPass(&SIPreAllocateWWMRegsID);
 
   // For allocating other whole wave mode registers.
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index fbb6c3d9fe24b..15afe387731dc 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -1559,7 +1559,7 @@ void SIFrameLowering::determinePrologEpilogSGPRSaves(
 static void addWwmRegBBLiveIn(MachineFunction &MF) {
   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
   for (MachineBasicBlock &MBB : MF) {
-    for (auto &Reg : MFI->getWWMReservedRegs())
+    for (Register Reg : MFI->getWWMReservedRegs())
       MBB.addLiveIn(Reg);
 
     MBB.sortUniqueLiveIns();
@@ -1606,7 +1606,7 @@ void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
   }
 
   SmallVector<Register> SortedWWMVGPRs;
-  for (auto &Reg : MFI->getWWMReservedRegs()) {
+  for (Register Reg : MFI->getWWMReservedRegs()) {
     // The shift-back is needed only for the VGPRs used for SGPR spills and they
     // are of 32-bit size. SIPreAllocateWWMRegs pass can add tuples into WWM
     // reserved registers.
@@ -1634,7 +1634,7 @@ void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
   }
 
   // Create the stack objects for WWM registers now.
-  for (auto &Reg : MFI->getWWMReservedRegs())
+  for (Register Reg : MFI->getWWMReservedRegs())
     MFI->allocateWWMSpill(MF, Reg);
 
   // Ignore the SGPRs the default implementation found.
diff --git a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
index eb6a47e1a41d3..ceb6cfe598689 100644
--- a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
@@ -36,7 +36,7 @@ namespace {
 static cl::opt<unsigned> MaxNumVGPRsForWwmAllocation(
     "amdgpu-num-vgprs-for-wwm-alloc",
     cl::desc("Max num VGPRs for whole-wave register allocation."),
-    cl::ReallyHidden, cl::init(5));
+    cl::ReallyHidden, cl::init(10));
 
 class SILowerSGPRSpills : public MachineFunctionPass {
 private:
@@ -349,8 +349,8 @@ void SILowerSGPRSpills::determineRegsForWWMAllocation(MachineFunction &MF,
     }
   }
 
-  assert(I == NumRegs &&
-         "Failed to find enough VGPRs for whole-wave register allocation");
+  if (I != NumRegs)
+    report_fatal_error("Can't find enough VGPRs for wwm-regalloc");
 }
 
 bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {

>From e49d3f865f6a40e45bccd043a5fd6bea20b0aafe Mon Sep 17 00:00:00 2001
From: Christudasan Devadasan <Christudasan.Devadasan at amd.com>
Date: Fri, 7 Jun 2024 16:42:46 +0530
Subject: [PATCH 04/13] changed fatal error into emitError.

---
 llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
index ceb6cfe598689..76b0517448ed5 100644
--- a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
@@ -349,8 +349,12 @@ void SILowerSGPRSpills::determineRegsForWWMAllocation(MachineFunction &MF,
     }
   }
 
-  if (I != NumRegs)
-    report_fatal_error("Can't find enough VGPRs for wwm-regalloc");
+  if (I != NumRegs) {
+    // Reserve an arbitrary register and report the error.
+    TRI->markSuperRegs(RegMask, AMDGPU::VGPR0);
+    MF.getFunction().getContext().emitError(
+        "can't find enough VGPRs for wwm-regalloc");
+  }
 }
 
 bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {

>From 08cc2fcd65eb56d24c1decb2134bbce6cbd1dda0 Mon Sep 17 00:00:00 2001
From: Christudasan Devadasan <Christudasan.Devadasan at amd.com>
Date: Tue, 11 Jun 2024 11:11:45 +0530
Subject: [PATCH 05/13] XFAILed lit test fixup.

---
 llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll |  362 ++----
 .../AMDGPU/partial-sgpr-to-vgpr-spills.ll     | 1088 ++++++++---------
 .../scc-clobbered-sgpr-to-vmem-spill.ll       |  461 ++++---
 ...sgpr-spill-incorrect-fi-bookkeeping-bug.ll |  361 +++++-
 .../CodeGen/AMDGPU/sgpr-spill-no-vgprs.ll     |  297 +++--
 5 files changed, 1349 insertions(+), 1220 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll
index 66af18ac7b167..1fb50959987df 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll
@@ -2,8 +2,6 @@
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 --mattr=+enable-flat-scratch < %s | FileCheck -check-prefixes=GCN,FLAT_SCR_OPT %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 --mattr=+architected-flat-scratch < %s | FileCheck -check-prefixes=GCN,FLAT_SCR_ARCH %s
 
-; XFAIL: *
-
 declare void @extern_func() #0
 
 define amdgpu_kernel void @stack_object_addrspacecast_in_kernel_no_calls() {
@@ -109,256 +107,116 @@ define amdgpu_kernel void @kernel_calls_no_stack() {
   ret void
 }
 
+; One VGPR was left free (VGPR0) for whole-wave register allocation.
 define amdgpu_kernel void @test(ptr addrspace(1) %out, i32 %in) {
-; FLAT_SCR_OPT-LABEL: test:
-; FLAT_SCR_OPT:       ; %bb.0:
-; FLAT_SCR_OPT-NEXT:    s_add_u32 s2, s2, s5
-; FLAT_SCR_OPT-NEXT:    s_addc_u32 s3, s3, 0
-; FLAT_SCR_OPT-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2
-; FLAT_SCR_OPT-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3
-; FLAT_SCR_OPT-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
-; FLAT_SCR_OPT-NEXT:    ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
-; FLAT_SCR_OPT-NEXT:    s_waitcnt lgkmcnt(0)
-; FLAT_SCR_OPT-NEXT:    v_writelane_b32 v0, s2, 0
-; FLAT_SCR_OPT-NEXT:    v_writelane_b32 v0, s3, 1
-; FLAT_SCR_OPT-NEXT:    s_or_saveexec_b32 s105, -1
-; FLAT_SCR_OPT-NEXT:    s_mov_b32 s2, 0
-; FLAT_SCR_OPT-NEXT:    scratch_store_dword off, v0, s2 ; 4-byte Folded Spill
-; FLAT_SCR_OPT-NEXT:    s_waitcnt_depctr 0xffe3
-; FLAT_SCR_OPT-NEXT:    s_mov_b32 exec_lo, s105
-; FLAT_SCR_OPT-NEXT:    s_load_dword vcc_lo, s[0:1], 0x8
-; FLAT_SCR_OPT-NEXT:    ; kill: killed $sgpr0_sgpr1
-; FLAT_SCR_OPT-NEXT:    ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT:    ;;#ASMEND
-; FLAT_SCR_OPT-NEXT:    ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT:    ;;#ASMEND
-; FLAT_SCR_OPT-NEXT:    ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT:    ;;#ASMEND
-; FLAT_SCR_OPT-NEXT:    ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT:    ;;#ASMEND
-; FLAT_SCR_OPT-NEXT:    ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT:    ;;#ASMEND
-; FLAT_SCR_OPT-NEXT:    ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT:    ;;#ASMEND
-; FLAT_SCR_OPT-NEXT:    ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT:    ;;#ASMEND
-; FLAT_SCR_OPT-NEXT:    ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT:    ;;#ASMEND
-; FLAT_SCR_OPT-NEXT:    ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT:    ;;#ASMEND
-; FLAT_SCR_OPT-NEXT:    ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT:    ;;#ASMEND
-; FLAT_SCR_OPT-NEXT:    ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT:    ;;#ASMEND
-; FLAT_SCR_OPT-NEXT:    ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT:    ;;#ASMEND
-; FLAT_SCR_OPT-NEXT:    ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT:    ;;#ASMEND
-; FLAT_SCR_OPT-NEXT:    ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT:    ;;#ASMEND
-; FLAT_SCR_OPT-NEXT:    ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT:    ;;#ASMEND
-; FLAT_SCR_OPT-NEXT:    ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT:    ;;#ASMEND
-; FLAT_SCR_OPT-NEXT:    ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT:    ;;#ASMEND
-; FLAT_SCR_OPT-NEXT:    ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT:    ;;#ASMEND
-; FLAT_SCR_OPT-NEXT:    ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT:    ;;#ASMEND
-; FLAT_SCR_OPT-NEXT:    ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT:    ;;#ASMEND
-; FLAT_SCR_OPT-NEXT:    ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT:    ;;#ASMEND
-; FLAT_SCR_OPT-NEXT:    ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT:    ;;#ASMEND
-; FLAT_SCR_OPT-NEXT:    ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT:    ;;#ASMEND
-; FLAT_SCR_OPT-NEXT:    ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT:    ;;#ASMEND
-; FLAT_SCR_OPT-NEXT:    ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT:    ;;#ASMEND
-; FLAT_SCR_OPT-NEXT:    ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT:    ;;#ASMEND
-; FLAT_SCR_OPT-NEXT:    ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT:    ;;#ASMEND
-; FLAT_SCR_OPT-NEXT:    ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT:    ;;#ASMEND
-; FLAT_SCR_OPT-NEXT:    ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT:    ;;#ASMEND
-; FLAT_SCR_OPT-NEXT:    ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT:    ;;#ASMEND
-; FLAT_SCR_OPT-NEXT:    ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT:    ;;#ASMEND
-; FLAT_SCR_OPT-NEXT:    ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT:    ;;#ASMEND
-; FLAT_SCR_OPT-NEXT:    ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT:    ;;#ASMEND
-; FLAT_SCR_OPT-NEXT:    ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT:    ;;#ASMEND
-; FLAT_SCR_OPT-NEXT:    ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT:    ;;#ASMEND
-; FLAT_SCR_OPT-NEXT:    ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT:    ;;#ASMEND
-; FLAT_SCR_OPT-NEXT:    ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT:    ;;#ASMEND
-; FLAT_SCR_OPT-NEXT:    ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT:    ;;#ASMEND
-; FLAT_SCR_OPT-NEXT:    ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT:    ;;#ASMEND
-; FLAT_SCR_OPT-NEXT:    ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT:    ;;#ASMEND
-; FLAT_SCR_OPT-NEXT:    ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT:    ;;#ASMEND
-; FLAT_SCR_OPT-NEXT:    ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT:    ;;#ASMEND
-; FLAT_SCR_OPT-NEXT:    s_waitcnt lgkmcnt(0)
-; FLAT_SCR_OPT-NEXT:    v_mov_b32_e32 v0, vcc_lo
-; FLAT_SCR_OPT-NEXT:    ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT:    ;;#ASMEND
-; FLAT_SCR_OPT-NEXT:    ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT:    ;;#ASMEND
-; FLAT_SCR_OPT-NEXT:    ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT:    ;;#ASMEND
-; FLAT_SCR_OPT-NEXT:    ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT:    ;;#ASMEND
-; FLAT_SCR_OPT-NEXT:    s_or_saveexec_b32 s105, -1
-; FLAT_SCR_OPT-NEXT:    s_mov_b32 s0, 0
-; FLAT_SCR_OPT-NEXT:    scratch_load_dword v1, off, s0 ; 4-byte Folded Reload
-; FLAT_SCR_OPT-NEXT:    s_waitcnt_depctr 0xffe3
-; FLAT_SCR_OPT-NEXT:    s_mov_b32 exec_lo, s105
-; FLAT_SCR_OPT-NEXT:    s_waitcnt vmcnt(0)
-; FLAT_SCR_OPT-NEXT:    v_readlane_b32 s0, v1, 0
-; FLAT_SCR_OPT-NEXT:    v_readlane_b32 s1, v1, 1
-; FLAT_SCR_OPT-NEXT:    s_or_saveexec_b32 s105, -1
-; FLAT_SCR_OPT-NEXT:    s_mov_b32 exec_lo, s105
-; FLAT_SCR_OPT-NEXT:    v_mov_b32_e32 v2, 0
-; FLAT_SCR_OPT-NEXT:    ; kill: killed $vgpr1
-; FLAT_SCR_OPT-NEXT:    global_store_dword v2, v0, s[0:1]
-; FLAT_SCR_OPT-NEXT:    s_endpgm
-;
-; FLAT_SCR_ARCH-LABEL: test:
-; FLAT_SCR_ARCH:       ; %bb.0:
-; FLAT_SCR_ARCH-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
-; FLAT_SCR_ARCH-NEXT:    ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
-; FLAT_SCR_ARCH-NEXT:    s_waitcnt lgkmcnt(0)
-; FLAT_SCR_ARCH-NEXT:    v_writelane_b32 v0, s2, 0
-; FLAT_SCR_ARCH-NEXT:    v_writelane_b32 v0, s3, 1
-; FLAT_SCR_ARCH-NEXT:    s_or_saveexec_b32 s105, -1
-; FLAT_SCR_ARCH-NEXT:    s_mov_b32 s2, 0
-; FLAT_SCR_ARCH-NEXT:    scratch_store_dword off, v0, s2 ; 4-byte Folded Spill
-; FLAT_SCR_ARCH-NEXT:    s_waitcnt_depctr 0xffe3
-; FLAT_SCR_ARCH-NEXT:    s_mov_b32 exec_lo, s105
-; FLAT_SCR_ARCH-NEXT:    s_load_dword vcc_lo, s[0:1], 0x8
-; FLAT_SCR_ARCH-NEXT:    ; kill: killed $sgpr0_sgpr1
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT:    s_waitcnt lgkmcnt(0)
-; FLAT_SCR_ARCH-NEXT:    v_mov_b32_e32 v0, vcc_lo
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT:    ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT:    s_or_saveexec_b32 s105, -1
-; FLAT_SCR_ARCH-NEXT:    s_mov_b32 s0, 0
-; FLAT_SCR_ARCH-NEXT:    scratch_load_dword v1, off, s0 ; 4-byte Folded Reload
-; FLAT_SCR_ARCH-NEXT:    s_waitcnt_depctr 0xffe3
-; FLAT_SCR_ARCH-NEXT:    s_mov_b32 exec_lo, s105
-; FLAT_SCR_ARCH-NEXT:    s_waitcnt vmcnt(0)
-; FLAT_SCR_ARCH-NEXT:    v_readlane_b32 s0, v1, 0
-; FLAT_SCR_ARCH-NEXT:    v_readlane_b32 s1, v1, 1
-; FLAT_SCR_ARCH-NEXT:    s_or_saveexec_b32 s105, -1
-; FLAT_SCR_ARCH-NEXT:    s_mov_b32 exec_lo, s105
-; FLAT_SCR_ARCH-NEXT:    v_mov_b32_e32 v2, 0
-; FLAT_SCR_ARCH-NEXT:    ; kill: killed $vgpr1
-; FLAT_SCR_ARCH-NEXT:    global_store_dword v2, v0, s[0:1]
-; FLAT_SCR_ARCH-NEXT:    s_endpgm
+; GCN-LABEL: test:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; GCN-NEXT:    s_load_dword vcc_lo, s[0:1], 0x8
+; GCN-NEXT:    ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
+; GCN-NEXT:    ; kill: killed $sgpr0_sgpr1
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    v_writelane_b32 v0, s2, 0
+; GCN-NEXT:    v_writelane_b32 v0, s3, 1
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    v_readlane_b32 s0, v0, 0
+; GCN-NEXT:    v_mov_b32_e32 v1, vcc_lo
+; GCN-NEXT:    v_readlane_b32 s1, v0, 1
+; GCN-NEXT:    v_mov_b32_e32 v2, 0
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    global_store_dword v2, v1, s[0:1]
+; GCN-NEXT:    s_endpgm
   call void asm sideeffect "", "~{s[0:7]}" ()
   call void asm sideeffect "", "~{s[8:15]}" ()
   call void asm sideeffect "", "~{s[16:23]}" ()
@@ -373,7 +231,7 @@ define amdgpu_kernel void @test(ptr addrspace(1) %out, i32 %in) {
   call void asm sideeffect "", "~{s[88:95]}" ()
   call void asm sideeffect "", "~{s[96:103]}" ()
   call void asm sideeffect "", "~{s[104:105]}" ()
-  call void asm sideeffect "", "~{v[0:7]}" ()
+  call void asm sideeffect "", "~{v[1:7]}" ()
   call void asm sideeffect "", "~{v[8:15]}" ()
   call void asm sideeffect "", "~{v[16:23]}" ()
   call void asm sideeffect "", "~{v[24:31]}" ()
diff --git a/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll b/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll
index e00eb0a582e63..078b133a93d6f 100644
--- a/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll
+++ b/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll
@@ -1,8 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -O0 -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-; XFAIL: *
-
 ; FIXME: we should disable sdwa peephole because dead-code elimination, that
 ; runs after peephole, ruins this test (different register numbers)
 
@@ -19,13 +17,11 @@ define amdgpu_kernel void @spill_sgprs_to_multiple_vgprs(ptr addrspace(1) %out,
 ; GCN-NEXT:    s_mov_b32 s95, 0xe8f000
 ; GCN-NEXT:    s_add_u32 s92, s92, s9
 ; GCN-NEXT:    s_addc_u32 s93, s93, 0
-; GCN-NEXT:    ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
-; GCN-NEXT:    ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
-; GCN-NEXT:    ; implicit-def: $vgpr2 : SGPR spill to VGPR lane
 ; GCN-NEXT:    s_load_dword s0, s[2:3], 0xb
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; def s[4:11]
 ; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    ; implicit-def: $vgpr2 : SGPR spill to VGPR lane
 ; GCN-NEXT:    v_writelane_b32 v2, s4, 0
 ; GCN-NEXT:    v_writelane_b32 v2, s5, 1
 ; GCN-NEXT:    v_writelane_b32 v2, s6, 2
@@ -117,107 +113,109 @@ define amdgpu_kernel void @spill_sgprs_to_multiple_vgprs(ptr addrspace(1) %out,
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; def s[4:11]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_writelane_b32 v1, s4, 0
-; GCN-NEXT:    v_writelane_b32 v1, s5, 1
-; GCN-NEXT:    v_writelane_b32 v1, s6, 2
-; GCN-NEXT:    v_writelane_b32 v1, s7, 3
-; GCN-NEXT:    v_writelane_b32 v1, s8, 4
-; GCN-NEXT:    v_writelane_b32 v1, s9, 5
-; GCN-NEXT:    v_writelane_b32 v1, s10, 6
-; GCN-NEXT:    v_writelane_b32 v1, s11, 7
+; GCN-NEXT:    ; implicit-def: $vgpr2 : SGPR spill to VGPR lane
+; GCN-NEXT:    v_writelane_b32 v2, s4, 0
+; GCN-NEXT:    v_writelane_b32 v2, s5, 1
+; GCN-NEXT:    v_writelane_b32 v2, s6, 2
+; GCN-NEXT:    v_writelane_b32 v2, s7, 3
+; GCN-NEXT:    v_writelane_b32 v2, s8, 4
+; GCN-NEXT:    v_writelane_b32 v2, s9, 5
+; GCN-NEXT:    v_writelane_b32 v2, s10, 6
+; GCN-NEXT:    v_writelane_b32 v2, s11, 7
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; def s[4:11]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_writelane_b32 v1, s4, 8
-; GCN-NEXT:    v_writelane_b32 v1, s5, 9
-; GCN-NEXT:    v_writelane_b32 v1, s6, 10
-; GCN-NEXT:    v_writelane_b32 v1, s7, 11
-; GCN-NEXT:    v_writelane_b32 v1, s8, 12
-; GCN-NEXT:    v_writelane_b32 v1, s9, 13
-; GCN-NEXT:    v_writelane_b32 v1, s10, 14
-; GCN-NEXT:    v_writelane_b32 v1, s11, 15
+; GCN-NEXT:    v_writelane_b32 v2, s4, 8
+; GCN-NEXT:    v_writelane_b32 v2, s5, 9
+; GCN-NEXT:    v_writelane_b32 v2, s6, 10
+; GCN-NEXT:    v_writelane_b32 v2, s7, 11
+; GCN-NEXT:    v_writelane_b32 v2, s8, 12
+; GCN-NEXT:    v_writelane_b32 v2, s9, 13
+; GCN-NEXT:    v_writelane_b32 v2, s10, 14
+; GCN-NEXT:    v_writelane_b32 v2, s11, 15
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; def s[4:11]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_writelane_b32 v1, s4, 16
-; GCN-NEXT:    v_writelane_b32 v1, s5, 17
-; GCN-NEXT:    v_writelane_b32 v1, s6, 18
-; GCN-NEXT:    v_writelane_b32 v1, s7, 19
-; GCN-NEXT:    v_writelane_b32 v1, s8, 20
-; GCN-NEXT:    v_writelane_b32 v1, s9, 21
-; GCN-NEXT:    v_writelane_b32 v1, s10, 22
-; GCN-NEXT:    v_writelane_b32 v1, s11, 23
+; GCN-NEXT:    v_writelane_b32 v2, s4, 16
+; GCN-NEXT:    v_writelane_b32 v2, s5, 17
+; GCN-NEXT:    v_writelane_b32 v2, s6, 18
+; GCN-NEXT:    v_writelane_b32 v2, s7, 19
+; GCN-NEXT:    v_writelane_b32 v2, s8, 20
+; GCN-NEXT:    v_writelane_b32 v2, s9, 21
+; GCN-NEXT:    v_writelane_b32 v2, s10, 22
+; GCN-NEXT:    v_writelane_b32 v2, s11, 23
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; def s[4:11]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_writelane_b32 v1, s4, 24
-; GCN-NEXT:    v_writelane_b32 v1, s5, 25
-; GCN-NEXT:    v_writelane_b32 v1, s6, 26
-; GCN-NEXT:    v_writelane_b32 v1, s7, 27
-; GCN-NEXT:    v_writelane_b32 v1, s8, 28
-; GCN-NEXT:    v_writelane_b32 v1, s9, 29
-; GCN-NEXT:    v_writelane_b32 v1, s10, 30
-; GCN-NEXT:    v_writelane_b32 v1, s11, 31
+; GCN-NEXT:    v_writelane_b32 v2, s4, 24
+; GCN-NEXT:    v_writelane_b32 v2, s5, 25
+; GCN-NEXT:    v_writelane_b32 v2, s6, 26
+; GCN-NEXT:    v_writelane_b32 v2, s7, 27
+; GCN-NEXT:    v_writelane_b32 v2, s8, 28
+; GCN-NEXT:    v_writelane_b32 v2, s9, 29
+; GCN-NEXT:    v_writelane_b32 v2, s10, 30
+; GCN-NEXT:    v_writelane_b32 v2, s11, 31
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; def s[4:11]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_writelane_b32 v1, s4, 32
-; GCN-NEXT:    v_writelane_b32 v1, s5, 33
-; GCN-NEXT:    v_writelane_b32 v1, s6, 34
-; GCN-NEXT:    v_writelane_b32 v1, s7, 35
-; GCN-NEXT:    v_writelane_b32 v1, s8, 36
-; GCN-NEXT:    v_writelane_b32 v1, s9, 37
-; GCN-NEXT:    v_writelane_b32 v1, s10, 38
-; GCN-NEXT:    v_writelane_b32 v1, s11, 39
+; GCN-NEXT:    v_writelane_b32 v2, s4, 32
+; GCN-NEXT:    v_writelane_b32 v2, s5, 33
+; GCN-NEXT:    v_writelane_b32 v2, s6, 34
+; GCN-NEXT:    v_writelane_b32 v2, s7, 35
+; GCN-NEXT:    v_writelane_b32 v2, s8, 36
+; GCN-NEXT:    v_writelane_b32 v2, s9, 37
+; GCN-NEXT:    v_writelane_b32 v2, s10, 38
+; GCN-NEXT:    v_writelane_b32 v2, s11, 39
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; def s[4:11]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_writelane_b32 v1, s4, 40
-; GCN-NEXT:    v_writelane_b32 v1, s5, 41
-; GCN-NEXT:    v_writelane_b32 v1, s6, 42
-; GCN-NEXT:    v_writelane_b32 v1, s7, 43
-; GCN-NEXT:    v_writelane_b32 v1, s8, 44
-; GCN-NEXT:    v_writelane_b32 v1, s9, 45
-; GCN-NEXT:    v_writelane_b32 v1, s10, 46
-; GCN-NEXT:    v_writelane_b32 v1, s11, 47
+; GCN-NEXT:    v_writelane_b32 v2, s4, 40
+; GCN-NEXT:    v_writelane_b32 v2, s5, 41
+; GCN-NEXT:    v_writelane_b32 v2, s6, 42
+; GCN-NEXT:    v_writelane_b32 v2, s7, 43
+; GCN-NEXT:    v_writelane_b32 v2, s8, 44
+; GCN-NEXT:    v_writelane_b32 v2, s9, 45
+; GCN-NEXT:    v_writelane_b32 v2, s10, 46
+; GCN-NEXT:    v_writelane_b32 v2, s11, 47
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; def s[4:11]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_writelane_b32 v1, s4, 48
-; GCN-NEXT:    v_writelane_b32 v1, s5, 49
-; GCN-NEXT:    v_writelane_b32 v1, s6, 50
-; GCN-NEXT:    v_writelane_b32 v1, s7, 51
-; GCN-NEXT:    v_writelane_b32 v1, s8, 52
-; GCN-NEXT:    v_writelane_b32 v1, s9, 53
-; GCN-NEXT:    v_writelane_b32 v1, s10, 54
-; GCN-NEXT:    v_writelane_b32 v1, s11, 55
+; GCN-NEXT:    v_writelane_b32 v2, s4, 48
+; GCN-NEXT:    v_writelane_b32 v2, s5, 49
+; GCN-NEXT:    v_writelane_b32 v2, s6, 50
+; GCN-NEXT:    v_writelane_b32 v2, s7, 51
+; GCN-NEXT:    v_writelane_b32 v2, s8, 52
+; GCN-NEXT:    v_writelane_b32 v2, s9, 53
+; GCN-NEXT:    v_writelane_b32 v2, s10, 54
+; GCN-NEXT:    v_writelane_b32 v2, s11, 55
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; def s[4:11]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_writelane_b32 v1, s4, 56
-; GCN-NEXT:    v_writelane_b32 v1, s5, 57
-; GCN-NEXT:    v_writelane_b32 v1, s6, 58
-; GCN-NEXT:    v_writelane_b32 v1, s7, 59
-; GCN-NEXT:    v_writelane_b32 v1, s8, 60
-; GCN-NEXT:    v_writelane_b32 v1, s9, 61
-; GCN-NEXT:    v_writelane_b32 v1, s10, 62
-; GCN-NEXT:    v_writelane_b32 v1, s11, 63
+; GCN-NEXT:    v_writelane_b32 v2, s4, 56
+; GCN-NEXT:    v_writelane_b32 v2, s5, 57
+; GCN-NEXT:    v_writelane_b32 v2, s6, 58
+; GCN-NEXT:    v_writelane_b32 v2, s7, 59
+; GCN-NEXT:    v_writelane_b32 v2, s8, 60
+; GCN-NEXT:    v_writelane_b32 v2, s9, 61
+; GCN-NEXT:    v_writelane_b32 v2, s10, 62
+; GCN-NEXT:    v_writelane_b32 v2, s11, 63
 ; GCN-NEXT:    s_or_saveexec_b64 s[34:35], -1
-; GCN-NEXT:    buffer_store_dword v1, off, s[92:95], 0 offset:4 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v2, off, s[92:95], 0 offset:4 ; 4-byte Folded Spill
 ; GCN-NEXT:    s_mov_b64 exec, s[34:35]
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; def s[4:11]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_writelane_b32 v0, s4, 0
-; GCN-NEXT:    v_writelane_b32 v0, s5, 1
-; GCN-NEXT:    v_writelane_b32 v0, s6, 2
-; GCN-NEXT:    v_writelane_b32 v0, s7, 3
-; GCN-NEXT:    v_writelane_b32 v0, s8, 4
-; GCN-NEXT:    v_writelane_b32 v0, s9, 5
-; GCN-NEXT:    v_writelane_b32 v0, s10, 6
-; GCN-NEXT:    v_writelane_b32 v0, s11, 7
+; GCN-NEXT:    ; implicit-def: $vgpr2 : SGPR spill to VGPR lane
+; GCN-NEXT:    v_writelane_b32 v2, s4, 0
+; GCN-NEXT:    v_writelane_b32 v2, s5, 1
+; GCN-NEXT:    v_writelane_b32 v2, s6, 2
+; GCN-NEXT:    v_writelane_b32 v2, s7, 3
+; GCN-NEXT:    v_writelane_b32 v2, s8, 4
+; GCN-NEXT:    v_writelane_b32 v2, s9, 5
+; GCN-NEXT:    v_writelane_b32 v2, s10, 6
+; GCN-NEXT:    v_writelane_b32 v2, s11, 7
 ; GCN-NEXT:    s_or_saveexec_b64 s[34:35], -1
-; GCN-NEXT:    buffer_store_dword v0, off, s[92:95], 0 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v2, off, s[92:95], 0 ; 4-byte Folded Spill
 ; GCN-NEXT:    s_mov_b64 exec, s[34:35]
 ; GCN-NEXT:    s_mov_b32 s1, 0
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
@@ -225,76 +223,76 @@ define amdgpu_kernel void @spill_sgprs_to_multiple_vgprs(ptr addrspace(1) %out,
 ; GCN-NEXT:    s_cbranch_scc1 .LBB0_2
 ; GCN-NEXT:  ; %bb.1: ; %bb0
 ; GCN-NEXT:    s_or_saveexec_b64 s[34:35], -1
-; GCN-NEXT:    buffer_load_dword v2, off, s[92:95], 0 offset:4 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v0, off, s[92:95], 0 offset:4 ; 4-byte Folded Reload
 ; GCN-NEXT:    s_mov_b64 exec, s[34:35]
 ; GCN-NEXT:    s_or_saveexec_b64 s[34:35], -1
 ; GCN-NEXT:    buffer_load_dword v1, off, s[92:95], 0 offset:8 ; 4-byte Folded Reload
 ; GCN-NEXT:    s_mov_b64 exec, s[34:35]
 ; GCN-NEXT:    s_waitcnt vmcnt(1)
-; GCN-NEXT:    v_readlane_b32 s8, v2, 56
-; GCN-NEXT:    v_readlane_b32 s9, v2, 57
-; GCN-NEXT:    v_readlane_b32 s10, v2, 58
-; GCN-NEXT:    v_readlane_b32 s11, v2, 59
-; GCN-NEXT:    v_readlane_b32 s12, v2, 60
-; GCN-NEXT:    v_readlane_b32 s13, v2, 61
-; GCN-NEXT:    v_readlane_b32 s14, v2, 62
-; GCN-NEXT:    v_readlane_b32 s15, v2, 63
-; GCN-NEXT:    v_readlane_b32 s16, v2, 48
-; GCN-NEXT:    v_readlane_b32 s17, v2, 49
-; GCN-NEXT:    v_readlane_b32 s18, v2, 50
-; GCN-NEXT:    v_readlane_b32 s19, v2, 51
-; GCN-NEXT:    v_readlane_b32 s20, v2, 52
-; GCN-NEXT:    v_readlane_b32 s21, v2, 53
-; GCN-NEXT:    v_readlane_b32 s22, v2, 54
-; GCN-NEXT:    v_readlane_b32 s23, v2, 55
-; GCN-NEXT:    v_readlane_b32 s24, v2, 40
-; GCN-NEXT:    v_readlane_b32 s25, v2, 41
-; GCN-NEXT:    v_readlane_b32 s26, v2, 42
-; GCN-NEXT:    v_readlane_b32 s27, v2, 43
-; GCN-NEXT:    v_readlane_b32 s28, v2, 44
-; GCN-NEXT:    v_readlane_b32 s29, v2, 45
-; GCN-NEXT:    v_readlane_b32 s30, v2, 46
-; GCN-NEXT:    v_readlane_b32 s31, v2, 47
-; GCN-NEXT:    v_readlane_b32 s36, v2, 32
-; GCN-NEXT:    v_readlane_b32 s37, v2, 33
-; GCN-NEXT:    v_readlane_b32 s38, v2, 34
-; GCN-NEXT:    v_readlane_b32 s39, v2, 35
-; GCN-NEXT:    v_readlane_b32 s40, v2, 36
-; GCN-NEXT:    v_readlane_b32 s41, v2, 37
-; GCN-NEXT:    v_readlane_b32 s42, v2, 38
-; GCN-NEXT:    v_readlane_b32 s43, v2, 39
-; GCN-NEXT:    v_readlane_b32 s44, v2, 24
-; GCN-NEXT:    v_readlane_b32 s45, v2, 25
-; GCN-NEXT:    v_readlane_b32 s46, v2, 26
-; GCN-NEXT:    v_readlane_b32 s47, v2, 27
-; GCN-NEXT:    v_readlane_b32 s48, v2, 28
-; GCN-NEXT:    v_readlane_b32 s49, v2, 29
-; GCN-NEXT:    v_readlane_b32 s50, v2, 30
-; GCN-NEXT:    v_readlane_b32 s51, v2, 31
-; GCN-NEXT:    v_readlane_b32 s52, v2, 16
-; GCN-NEXT:    v_readlane_b32 s53, v2, 17
-; GCN-NEXT:    v_readlane_b32 s54, v2, 18
-; GCN-NEXT:    v_readlane_b32 s55, v2, 19
-; GCN-NEXT:    v_readlane_b32 s56, v2, 20
-; GCN-NEXT:    v_readlane_b32 s57, v2, 21
-; GCN-NEXT:    v_readlane_b32 s58, v2, 22
-; GCN-NEXT:    v_readlane_b32 s59, v2, 23
-; GCN-NEXT:    v_readlane_b32 s60, v2, 8
-; GCN-NEXT:    v_readlane_b32 s61, v2, 9
-; GCN-NEXT:    v_readlane_b32 s62, v2, 10
-; GCN-NEXT:    v_readlane_b32 s63, v2, 11
-; GCN-NEXT:    v_readlane_b32 s64, v2, 12
-; GCN-NEXT:    v_readlane_b32 s65, v2, 13
-; GCN-NEXT:    v_readlane_b32 s66, v2, 14
-; GCN-NEXT:    v_readlane_b32 s67, v2, 15
-; GCN-NEXT:    v_readlane_b32 s68, v2, 0
-; GCN-NEXT:    v_readlane_b32 s69, v2, 1
-; GCN-NEXT:    v_readlane_b32 s70, v2, 2
-; GCN-NEXT:    v_readlane_b32 s71, v2, 3
-; GCN-NEXT:    v_readlane_b32 s72, v2, 4
-; GCN-NEXT:    v_readlane_b32 s73, v2, 5
-; GCN-NEXT:    v_readlane_b32 s74, v2, 6
-; GCN-NEXT:    v_readlane_b32 s75, v2, 7
+; GCN-NEXT:    v_readlane_b32 s8, v0, 56
+; GCN-NEXT:    v_readlane_b32 s9, v0, 57
+; GCN-NEXT:    v_readlane_b32 s10, v0, 58
+; GCN-NEXT:    v_readlane_b32 s11, v0, 59
+; GCN-NEXT:    v_readlane_b32 s12, v0, 60
+; GCN-NEXT:    v_readlane_b32 s13, v0, 61
+; GCN-NEXT:    v_readlane_b32 s14, v0, 62
+; GCN-NEXT:    v_readlane_b32 s15, v0, 63
+; GCN-NEXT:    v_readlane_b32 s16, v0, 48
+; GCN-NEXT:    v_readlane_b32 s17, v0, 49
+; GCN-NEXT:    v_readlane_b32 s18, v0, 50
+; GCN-NEXT:    v_readlane_b32 s19, v0, 51
+; GCN-NEXT:    v_readlane_b32 s20, v0, 52
+; GCN-NEXT:    v_readlane_b32 s21, v0, 53
+; GCN-NEXT:    v_readlane_b32 s22, v0, 54
+; GCN-NEXT:    v_readlane_b32 s23, v0, 55
+; GCN-NEXT:    v_readlane_b32 s24, v0, 40
+; GCN-NEXT:    v_readlane_b32 s25, v0, 41
+; GCN-NEXT:    v_readlane_b32 s26, v0, 42
+; GCN-NEXT:    v_readlane_b32 s27, v0, 43
+; GCN-NEXT:    v_readlane_b32 s28, v0, 44
+; GCN-NEXT:    v_readlane_b32 s29, v0, 45
+; GCN-NEXT:    v_readlane_b32 s30, v0, 46
+; GCN-NEXT:    v_readlane_b32 s31, v0, 47
+; GCN-NEXT:    v_readlane_b32 s36, v0, 32
+; GCN-NEXT:    v_readlane_b32 s37, v0, 33
+; GCN-NEXT:    v_readlane_b32 s38, v0, 34
+; GCN-NEXT:    v_readlane_b32 s39, v0, 35
+; GCN-NEXT:    v_readlane_b32 s40, v0, 36
+; GCN-NEXT:    v_readlane_b32 s41, v0, 37
+; GCN-NEXT:    v_readlane_b32 s42, v0, 38
+; GCN-NEXT:    v_readlane_b32 s43, v0, 39
+; GCN-NEXT:    v_readlane_b32 s44, v0, 24
+; GCN-NEXT:    v_readlane_b32 s45, v0, 25
+; GCN-NEXT:    v_readlane_b32 s46, v0, 26
+; GCN-NEXT:    v_readlane_b32 s47, v0, 27
+; GCN-NEXT:    v_readlane_b32 s48, v0, 28
+; GCN-NEXT:    v_readlane_b32 s49, v0, 29
+; GCN-NEXT:    v_readlane_b32 s50, v0, 30
+; GCN-NEXT:    v_readlane_b32 s51, v0, 31
+; GCN-NEXT:    v_readlane_b32 s52, v0, 16
+; GCN-NEXT:    v_readlane_b32 s53, v0, 17
+; GCN-NEXT:    v_readlane_b32 s54, v0, 18
+; GCN-NEXT:    v_readlane_b32 s55, v0, 19
+; GCN-NEXT:    v_readlane_b32 s56, v0, 20
+; GCN-NEXT:    v_readlane_b32 s57, v0, 21
+; GCN-NEXT:    v_readlane_b32 s58, v0, 22
+; GCN-NEXT:    v_readlane_b32 s59, v0, 23
+; GCN-NEXT:    v_readlane_b32 s60, v0, 8
+; GCN-NEXT:    v_readlane_b32 s61, v0, 9
+; GCN-NEXT:    v_readlane_b32 s62, v0, 10
+; GCN-NEXT:    v_readlane_b32 s63, v0, 11
+; GCN-NEXT:    v_readlane_b32 s64, v0, 12
+; GCN-NEXT:    v_readlane_b32 s65, v0, 13
+; GCN-NEXT:    v_readlane_b32 s66, v0, 14
+; GCN-NEXT:    v_readlane_b32 s67, v0, 15
+; GCN-NEXT:    v_readlane_b32 s68, v0, 0
+; GCN-NEXT:    v_readlane_b32 s69, v0, 1
+; GCN-NEXT:    v_readlane_b32 s70, v0, 2
+; GCN-NEXT:    v_readlane_b32 s71, v0, 3
+; GCN-NEXT:    v_readlane_b32 s72, v0, 4
+; GCN-NEXT:    v_readlane_b32 s73, v0, 5
+; GCN-NEXT:    v_readlane_b32 s74, v0, 6
+; GCN-NEXT:    v_readlane_b32 s75, v0, 7
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    v_readlane_b32 s76, v1, 56
 ; GCN-NEXT:    v_readlane_b32 s77, v1, 57
@@ -321,7 +319,7 @@ define amdgpu_kernel void @spill_sgprs_to_multiple_vgprs(ptr addrspace(1) %out,
 ; GCN-NEXT:    v_readlane_b32 s6, v1, 6
 ; GCN-NEXT:    v_readlane_b32 s7, v1, 7
 ; GCN-NEXT:    s_or_saveexec_b64 s[34:35], -1
-; GCN-NEXT:    buffer_load_dword v0, off, s[92:95], 0 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v2, off, s[92:95], 0 ; 4-byte Folded Reload
 ; GCN-NEXT:    s_mov_b64 exec, s[34:35]
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; use s[0:7]
@@ -382,14 +380,14 @@ define amdgpu_kernel void @spill_sgprs_to_multiple_vgprs(ptr addrspace(1) %out,
 ; GCN-NEXT:    ; use s[0:7]
 ; GCN-NEXT:    ;;#ASMEND
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    v_readlane_b32 s0, v0, 0
-; GCN-NEXT:    v_readlane_b32 s1, v0, 1
-; GCN-NEXT:    v_readlane_b32 s2, v0, 2
-; GCN-NEXT:    v_readlane_b32 s3, v0, 3
-; GCN-NEXT:    v_readlane_b32 s4, v0, 4
-; GCN-NEXT:    v_readlane_b32 s5, v0, 5
-; GCN-NEXT:    v_readlane_b32 s6, v0, 6
-; GCN-NEXT:    v_readlane_b32 s7, v0, 7
+; GCN-NEXT:    v_readlane_b32 s0, v2, 0
+; GCN-NEXT:    v_readlane_b32 s1, v2, 1
+; GCN-NEXT:    v_readlane_b32 s2, v2, 2
+; GCN-NEXT:    v_readlane_b32 s3, v2, 3
+; GCN-NEXT:    v_readlane_b32 s4, v2, 4
+; GCN-NEXT:    v_readlane_b32 s5, v2, 5
+; GCN-NEXT:    v_readlane_b32 s6, v2, 6
+; GCN-NEXT:    v_readlane_b32 s7, v2, 7
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; use s[84:91]
 ; GCN-NEXT:    ;;#ASMEND
@@ -424,18 +422,6 @@ define amdgpu_kernel void @spill_sgprs_to_multiple_vgprs(ptr addrspace(1) %out,
 ; GCN-NEXT:    ; use s[0:7]
 ; GCN-NEXT:    ;;#ASMEND
 ; GCN-NEXT:  .LBB0_2: ; %ret
-; GCN-NEXT:    s_or_saveexec_b64 s[34:35], -1
-; GCN-NEXT:    buffer_load_dword v0, off, s[92:95], 0 ; 4-byte Folded Reload
-; GCN-NEXT:    s_mov_b64 exec, s[34:35]
-; GCN-NEXT:    s_or_saveexec_b64 s[34:35], -1
-; GCN-NEXT:    buffer_load_dword v1, off, s[92:95], 0 offset:4 ; 4-byte Folded Reload
-; GCN-NEXT:    s_mov_b64 exec, s[34:35]
-; GCN-NEXT:    s_or_saveexec_b64 s[34:35], -1
-; GCN-NEXT:    buffer_load_dword v2, off, s[92:95], 0 offset:8 ; 4-byte Folded Reload
-; GCN-NEXT:    s_mov_b64 exec, s[34:35]
-; GCN-NEXT:    ; kill: killed $vgpr2
-; GCN-NEXT:    ; kill: killed $vgpr1
-; GCN-NEXT:    ; kill: killed $vgpr0
 ; GCN-NEXT:    s_endpgm
   %wide.sgpr0 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
   %wide.sgpr1 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
@@ -492,12 +478,11 @@ define amdgpu_kernel void @split_sgpr_spill_2_vgprs(ptr addrspace(1) %out, i32 %
 ; GCN-NEXT:    s_mov_b32 s55, 0xe8f000
 ; GCN-NEXT:    s_add_u32 s52, s52, s9
 ; GCN-NEXT:    s_addc_u32 s53, s53, 0
-; GCN-NEXT:    ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
-; GCN-NEXT:    ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
 ; GCN-NEXT:    s_load_dword s0, s[2:3], 0xb
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; def s[4:19]
 ; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
 ; GCN-NEXT:    v_writelane_b32 v1, s4, 0
 ; GCN-NEXT:    v_writelane_b32 v1, s5, 1
 ; GCN-NEXT:    v_writelane_b32 v1, s6, 2
@@ -577,21 +562,22 @@ define amdgpu_kernel void @split_sgpr_spill_2_vgprs(ptr addrspace(1) %out, i32 %
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; def s[4:11]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_writelane_b32 v0, s4, 0
-; GCN-NEXT:    v_writelane_b32 v0, s5, 1
-; GCN-NEXT:    v_writelane_b32 v0, s6, 2
-; GCN-NEXT:    v_writelane_b32 v0, s7, 3
-; GCN-NEXT:    v_writelane_b32 v0, s8, 4
-; GCN-NEXT:    v_writelane_b32 v0, s9, 5
-; GCN-NEXT:    v_writelane_b32 v0, s10, 6
-; GCN-NEXT:    v_writelane_b32 v0, s11, 7
+; GCN-NEXT:    ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
+; GCN-NEXT:    v_writelane_b32 v1, s4, 0
+; GCN-NEXT:    v_writelane_b32 v1, s5, 1
+; GCN-NEXT:    v_writelane_b32 v1, s6, 2
+; GCN-NEXT:    v_writelane_b32 v1, s7, 3
+; GCN-NEXT:    v_writelane_b32 v1, s8, 4
+; GCN-NEXT:    v_writelane_b32 v1, s9, 5
+; GCN-NEXT:    v_writelane_b32 v1, s10, 6
+; GCN-NEXT:    v_writelane_b32 v1, s11, 7
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; def s[2:3]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_writelane_b32 v0, s2, 8
-; GCN-NEXT:    v_writelane_b32 v0, s3, 9
+; GCN-NEXT:    v_writelane_b32 v1, s2, 8
+; GCN-NEXT:    v_writelane_b32 v1, s3, 9
 ; GCN-NEXT:    s_or_saveexec_b64 s[28:29], -1
-; GCN-NEXT:    buffer_store_dword v0, off, s[52:55], 0 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v1, off, s[52:55], 0 ; 4-byte Folded Spill
 ; GCN-NEXT:    s_mov_b64 exec, s[28:29]
 ; GCN-NEXT:    s_mov_b32 s1, 0
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
@@ -599,93 +585,93 @@ define amdgpu_kernel void @split_sgpr_spill_2_vgprs(ptr addrspace(1) %out, i32 %
 ; GCN-NEXT:    s_cbranch_scc1 .LBB1_2
 ; GCN-NEXT:  ; %bb.1: ; %bb0
 ; GCN-NEXT:    s_or_saveexec_b64 s[28:29], -1
-; GCN-NEXT:    buffer_load_dword v1, off, s[52:55], 0 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v0, off, s[52:55], 0 ; 4-byte Folded Reload
 ; GCN-NEXT:    s_mov_b64 exec, s[28:29]
 ; GCN-NEXT:    s_or_saveexec_b64 s[28:29], -1
-; GCN-NEXT:    buffer_load_dword v0, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v1, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload
 ; GCN-NEXT:    s_mov_b64 exec, s[28:29]
 ; GCN-NEXT:    s_waitcnt vmcnt(1)
-; GCN-NEXT:    v_readlane_b32 s16, v1, 8
-; GCN-NEXT:    v_readlane_b32 s17, v1, 9
-; GCN-NEXT:    v_readlane_b32 s20, v1, 0
-; GCN-NEXT:    v_readlane_b32 s21, v1, 1
-; GCN-NEXT:    v_readlane_b32 s22, v1, 2
-; GCN-NEXT:    v_readlane_b32 s23, v1, 3
-; GCN-NEXT:    v_readlane_b32 s24, v1, 4
-; GCN-NEXT:    v_readlane_b32 s25, v1, 5
-; GCN-NEXT:    v_readlane_b32 s26, v1, 6
-; GCN-NEXT:    v_readlane_b32 s27, v1, 7
+; GCN-NEXT:    v_readlane_b32 s16, v0, 8
+; GCN-NEXT:    v_readlane_b32 s17, v0, 9
+; GCN-NEXT:    v_readlane_b32 s20, v0, 0
+; GCN-NEXT:    v_readlane_b32 s21, v0, 1
+; GCN-NEXT:    v_readlane_b32 s22, v0, 2
+; GCN-NEXT:    v_readlane_b32 s23, v0, 3
+; GCN-NEXT:    v_readlane_b32 s24, v0, 4
+; GCN-NEXT:    v_readlane_b32 s25, v0, 5
+; GCN-NEXT:    v_readlane_b32 s26, v0, 6
+; GCN-NEXT:    v_readlane_b32 s27, v0, 7
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    v_readlane_b32 s36, v0, 32
-; GCN-NEXT:    v_readlane_b32 s37, v0, 33
-; GCN-NEXT:    v_readlane_b32 s38, v0, 34
-; GCN-NEXT:    v_readlane_b32 s39, v0, 35
-; GCN-NEXT:    v_readlane_b32 s40, v0, 36
-; GCN-NEXT:    v_readlane_b32 s41, v0, 37
-; GCN-NEXT:    v_readlane_b32 s42, v0, 38
-; GCN-NEXT:    v_readlane_b32 s43, v0, 39
-; GCN-NEXT:    v_readlane_b32 s44, v0, 40
-; GCN-NEXT:    v_readlane_b32 s45, v0, 41
-; GCN-NEXT:    v_readlane_b32 s46, v0, 42
-; GCN-NEXT:    v_readlane_b32 s47, v0, 43
-; GCN-NEXT:    v_readlane_b32 s48, v0, 44
-; GCN-NEXT:    v_readlane_b32 s49, v0, 45
-; GCN-NEXT:    v_readlane_b32 s50, v0, 46
-; GCN-NEXT:    v_readlane_b32 s51, v0, 47
-; GCN-NEXT:    v_readlane_b32 s0, v0, 0
-; GCN-NEXT:    v_readlane_b32 s1, v0, 1
-; GCN-NEXT:    v_readlane_b32 s2, v0, 2
-; GCN-NEXT:    v_readlane_b32 s3, v0, 3
-; GCN-NEXT:    v_readlane_b32 s4, v0, 4
-; GCN-NEXT:    v_readlane_b32 s5, v0, 5
-; GCN-NEXT:    v_readlane_b32 s6, v0, 6
-; GCN-NEXT:    v_readlane_b32 s7, v0, 7
-; GCN-NEXT:    v_readlane_b32 s8, v0, 8
-; GCN-NEXT:    v_readlane_b32 s9, v0, 9
-; GCN-NEXT:    v_readlane_b32 s10, v0, 10
-; GCN-NEXT:    v_readlane_b32 s11, v0, 11
-; GCN-NEXT:    v_readlane_b32 s12, v0, 12
-; GCN-NEXT:    v_readlane_b32 s13, v0, 13
-; GCN-NEXT:    v_readlane_b32 s14, v0, 14
-; GCN-NEXT:    v_readlane_b32 s15, v0, 15
+; GCN-NEXT:    v_readlane_b32 s36, v1, 32
+; GCN-NEXT:    v_readlane_b32 s37, v1, 33
+; GCN-NEXT:    v_readlane_b32 s38, v1, 34
+; GCN-NEXT:    v_readlane_b32 s39, v1, 35
+; GCN-NEXT:    v_readlane_b32 s40, v1, 36
+; GCN-NEXT:    v_readlane_b32 s41, v1, 37
+; GCN-NEXT:    v_readlane_b32 s42, v1, 38
+; GCN-NEXT:    v_readlane_b32 s43, v1, 39
+; GCN-NEXT:    v_readlane_b32 s44, v1, 40
+; GCN-NEXT:    v_readlane_b32 s45, v1, 41
+; GCN-NEXT:    v_readlane_b32 s46, v1, 42
+; GCN-NEXT:    v_readlane_b32 s47, v1, 43
+; GCN-NEXT:    v_readlane_b32 s48, v1, 44
+; GCN-NEXT:    v_readlane_b32 s49, v1, 45
+; GCN-NEXT:    v_readlane_b32 s50, v1, 46
+; GCN-NEXT:    v_readlane_b32 s51, v1, 47
+; GCN-NEXT:    v_readlane_b32 s0, v1, 0
+; GCN-NEXT:    v_readlane_b32 s1, v1, 1
+; GCN-NEXT:    v_readlane_b32 s2, v1, 2
+; GCN-NEXT:    v_readlane_b32 s3, v1, 3
+; GCN-NEXT:    v_readlane_b32 s4, v1, 4
+; GCN-NEXT:    v_readlane_b32 s5, v1, 5
+; GCN-NEXT:    v_readlane_b32 s6, v1, 6
+; GCN-NEXT:    v_readlane_b32 s7, v1, 7
+; GCN-NEXT:    v_readlane_b32 s8, v1, 8
+; GCN-NEXT:    v_readlane_b32 s9, v1, 9
+; GCN-NEXT:    v_readlane_b32 s10, v1, 10
+; GCN-NEXT:    v_readlane_b32 s11, v1, 11
+; GCN-NEXT:    v_readlane_b32 s12, v1, 12
+; GCN-NEXT:    v_readlane_b32 s13, v1, 13
+; GCN-NEXT:    v_readlane_b32 s14, v1, 14
+; GCN-NEXT:    v_readlane_b32 s15, v1, 15
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; use s[0:15]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_readlane_b32 s0, v0, 16
-; GCN-NEXT:    v_readlane_b32 s1, v0, 17
-; GCN-NEXT:    v_readlane_b32 s2, v0, 18
-; GCN-NEXT:    v_readlane_b32 s3, v0, 19
-; GCN-NEXT:    v_readlane_b32 s4, v0, 20
-; GCN-NEXT:    v_readlane_b32 s5, v0, 21
-; GCN-NEXT:    v_readlane_b32 s6, v0, 22
-; GCN-NEXT:    v_readlane_b32 s7, v0, 23
-; GCN-NEXT:    v_readlane_b32 s8, v0, 24
-; GCN-NEXT:    v_readlane_b32 s9, v0, 25
-; GCN-NEXT:    v_readlane_b32 s10, v0, 26
-; GCN-NEXT:    v_readlane_b32 s11, v0, 27
-; GCN-NEXT:    v_readlane_b32 s12, v0, 28
-; GCN-NEXT:    v_readlane_b32 s13, v0, 29
-; GCN-NEXT:    v_readlane_b32 s14, v0, 30
-; GCN-NEXT:    v_readlane_b32 s15, v0, 31
+; GCN-NEXT:    v_readlane_b32 s0, v1, 16
+; GCN-NEXT:    v_readlane_b32 s1, v1, 17
+; GCN-NEXT:    v_readlane_b32 s2, v1, 18
+; GCN-NEXT:    v_readlane_b32 s3, v1, 19
+; GCN-NEXT:    v_readlane_b32 s4, v1, 20
+; GCN-NEXT:    v_readlane_b32 s5, v1, 21
+; GCN-NEXT:    v_readlane_b32 s6, v1, 22
+; GCN-NEXT:    v_readlane_b32 s7, v1, 23
+; GCN-NEXT:    v_readlane_b32 s8, v1, 24
+; GCN-NEXT:    v_readlane_b32 s9, v1, 25
+; GCN-NEXT:    v_readlane_b32 s10, v1, 26
+; GCN-NEXT:    v_readlane_b32 s11, v1, 27
+; GCN-NEXT:    v_readlane_b32 s12, v1, 28
+; GCN-NEXT:    v_readlane_b32 s13, v1, 29
+; GCN-NEXT:    v_readlane_b32 s14, v1, 30
+; GCN-NEXT:    v_readlane_b32 s15, v1, 31
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; use s[0:15]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_readlane_b32 s0, v0, 48
-; GCN-NEXT:    v_readlane_b32 s1, v0, 49
-; GCN-NEXT:    v_readlane_b32 s2, v0, 50
-; GCN-NEXT:    v_readlane_b32 s3, v0, 51
-; GCN-NEXT:    v_readlane_b32 s4, v0, 52
-; GCN-NEXT:    v_readlane_b32 s5, v0, 53
-; GCN-NEXT:    v_readlane_b32 s6, v0, 54
-; GCN-NEXT:    v_readlane_b32 s7, v0, 55
-; GCN-NEXT:    v_readlane_b32 s8, v0, 56
-; GCN-NEXT:    v_readlane_b32 s9, v0, 57
-; GCN-NEXT:    v_readlane_b32 s10, v0, 58
-; GCN-NEXT:    v_readlane_b32 s11, v0, 59
-; GCN-NEXT:    v_readlane_b32 s12, v0, 60
-; GCN-NEXT:    v_readlane_b32 s13, v0, 61
-; GCN-NEXT:    v_readlane_b32 s14, v0, 62
-; GCN-NEXT:    v_readlane_b32 s15, v0, 63
+; GCN-NEXT:    v_readlane_b32 s0, v1, 48
+; GCN-NEXT:    v_readlane_b32 s1, v1, 49
+; GCN-NEXT:    v_readlane_b32 s2, v1, 50
+; GCN-NEXT:    v_readlane_b32 s3, v1, 51
+; GCN-NEXT:    v_readlane_b32 s4, v1, 52
+; GCN-NEXT:    v_readlane_b32 s5, v1, 53
+; GCN-NEXT:    v_readlane_b32 s6, v1, 54
+; GCN-NEXT:    v_readlane_b32 s7, v1, 55
+; GCN-NEXT:    v_readlane_b32 s8, v1, 56
+; GCN-NEXT:    v_readlane_b32 s9, v1, 57
+; GCN-NEXT:    v_readlane_b32 s10, v1, 58
+; GCN-NEXT:    v_readlane_b32 s11, v1, 59
+; GCN-NEXT:    v_readlane_b32 s12, v1, 60
+; GCN-NEXT:    v_readlane_b32 s13, v1, 61
+; GCN-NEXT:    v_readlane_b32 s14, v1, 62
+; GCN-NEXT:    v_readlane_b32 s15, v1, 63
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; use s[36:51]
 ; GCN-NEXT:    ;;#ASMEND
@@ -699,14 +685,6 @@ define amdgpu_kernel void @split_sgpr_spill_2_vgprs(ptr addrspace(1) %out, i32 %
 ; GCN-NEXT:    ; use s[0:15]
 ; GCN-NEXT:    ;;#ASMEND
 ; GCN-NEXT:  .LBB1_2: ; %ret
-; GCN-NEXT:    s_or_saveexec_b64 s[28:29], -1
-; GCN-NEXT:    buffer_load_dword v0, off, s[52:55], 0 ; 4-byte Folded Reload
-; GCN-NEXT:    s_mov_b64 exec, s[28:29]
-; GCN-NEXT:    s_or_saveexec_b64 s[28:29], -1
-; GCN-NEXT:    buffer_load_dword v1, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload
-; GCN-NEXT:    s_mov_b64 exec, s[28:29]
-; GCN-NEXT:    ; kill: killed $vgpr1
-; GCN-NEXT:    ; kill: killed $vgpr0
 ; GCN-NEXT:    s_endpgm
   %wide.sgpr0 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
   %wide.sgpr1 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
@@ -743,17 +721,9 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill(ptr addrspace(1) %out, i32 %
 ; GCN-NEXT:    s_mov_b32 s55, 0xe8f000
 ; GCN-NEXT:    s_add_u32 s52, s52, s9
 ; GCN-NEXT:    s_addc_u32 s53, s53, 0
-; GCN-NEXT:    ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
-; GCN-NEXT:    ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
 ; GCN-NEXT:    s_load_dword s0, s[2:3], 0xb
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    s_or_saveexec_b64 s[34:35], -1
-; GCN-NEXT:    buffer_load_dword v1, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload
-; GCN-NEXT:    s_mov_b64 exec, s[34:35]
-; GCN-NEXT:    s_or_saveexec_b64 s[34:35], -1
-; GCN-NEXT:    buffer_load_dword v0, off, s[52:55], 0 ; 4-byte Folded Reload
-; GCN-NEXT:    s_mov_b64 exec, s[34:35]
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ;;#ASMEND
 ; GCN-NEXT:    ;;#ASMSTART
@@ -767,91 +737,91 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill(ptr addrspace(1) %out, i32 %
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; def s[4:19]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    s_waitcnt vmcnt(1)
-; GCN-NEXT:    v_writelane_b32 v1, s4, 0
-; GCN-NEXT:    v_writelane_b32 v1, s5, 1
-; GCN-NEXT:    v_writelane_b32 v1, s6, 2
-; GCN-NEXT:    v_writelane_b32 v1, s7, 3
-; GCN-NEXT:    v_writelane_b32 v1, s8, 4
-; GCN-NEXT:    v_writelane_b32 v1, s9, 5
-; GCN-NEXT:    v_writelane_b32 v1, s10, 6
-; GCN-NEXT:    v_writelane_b32 v1, s11, 7
-; GCN-NEXT:    v_writelane_b32 v1, s12, 8
-; GCN-NEXT:    v_writelane_b32 v1, s13, 9
-; GCN-NEXT:    v_writelane_b32 v1, s14, 10
-; GCN-NEXT:    v_writelane_b32 v1, s15, 11
-; GCN-NEXT:    v_writelane_b32 v1, s16, 12
-; GCN-NEXT:    v_writelane_b32 v1, s17, 13
-; GCN-NEXT:    v_writelane_b32 v1, s18, 14
-; GCN-NEXT:    v_writelane_b32 v1, s19, 15
+; GCN-NEXT:    ; implicit-def: $vgpr32 : SGPR spill to VGPR lane
+; GCN-NEXT:    v_writelane_b32 v32, s4, 0
+; GCN-NEXT:    v_writelane_b32 v32, s5, 1
+; GCN-NEXT:    v_writelane_b32 v32, s6, 2
+; GCN-NEXT:    v_writelane_b32 v32, s7, 3
+; GCN-NEXT:    v_writelane_b32 v32, s8, 4
+; GCN-NEXT:    v_writelane_b32 v32, s9, 5
+; GCN-NEXT:    v_writelane_b32 v32, s10, 6
+; GCN-NEXT:    v_writelane_b32 v32, s11, 7
+; GCN-NEXT:    v_writelane_b32 v32, s12, 8
+; GCN-NEXT:    v_writelane_b32 v32, s13, 9
+; GCN-NEXT:    v_writelane_b32 v32, s14, 10
+; GCN-NEXT:    v_writelane_b32 v32, s15, 11
+; GCN-NEXT:    v_writelane_b32 v32, s16, 12
+; GCN-NEXT:    v_writelane_b32 v32, s17, 13
+; GCN-NEXT:    v_writelane_b32 v32, s18, 14
+; GCN-NEXT:    v_writelane_b32 v32, s19, 15
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; def s[4:19]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_writelane_b32 v1, s4, 16
-; GCN-NEXT:    v_writelane_b32 v1, s5, 17
-; GCN-NEXT:    v_writelane_b32 v1, s6, 18
-; GCN-NEXT:    v_writelane_b32 v1, s7, 19
-; GCN-NEXT:    v_writelane_b32 v1, s8, 20
-; GCN-NEXT:    v_writelane_b32 v1, s9, 21
-; GCN-NEXT:    v_writelane_b32 v1, s10, 22
-; GCN-NEXT:    v_writelane_b32 v1, s11, 23
-; GCN-NEXT:    v_writelane_b32 v1, s12, 24
-; GCN-NEXT:    v_writelane_b32 v1, s13, 25
-; GCN-NEXT:    v_writelane_b32 v1, s14, 26
-; GCN-NEXT:    v_writelane_b32 v1, s15, 27
-; GCN-NEXT:    v_writelane_b32 v1, s16, 28
-; GCN-NEXT:    v_writelane_b32 v1, s17, 29
-; GCN-NEXT:    v_writelane_b32 v1, s18, 30
-; GCN-NEXT:    v_writelane_b32 v1, s19, 31
+; GCN-NEXT:    v_writelane_b32 v32, s4, 16
+; GCN-NEXT:    v_writelane_b32 v32, s5, 17
+; GCN-NEXT:    v_writelane_b32 v32, s6, 18
+; GCN-NEXT:    v_writelane_b32 v32, s7, 19
+; GCN-NEXT:    v_writelane_b32 v32, s8, 20
+; GCN-NEXT:    v_writelane_b32 v32, s9, 21
+; GCN-NEXT:    v_writelane_b32 v32, s10, 22
+; GCN-NEXT:    v_writelane_b32 v32, s11, 23
+; GCN-NEXT:    v_writelane_b32 v32, s12, 24
+; GCN-NEXT:    v_writelane_b32 v32, s13, 25
+; GCN-NEXT:    v_writelane_b32 v32, s14, 26
+; GCN-NEXT:    v_writelane_b32 v32, s15, 27
+; GCN-NEXT:    v_writelane_b32 v32, s16, 28
+; GCN-NEXT:    v_writelane_b32 v32, s17, 29
+; GCN-NEXT:    v_writelane_b32 v32, s18, 30
+; GCN-NEXT:    v_writelane_b32 v32, s19, 31
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; def s[4:19]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_writelane_b32 v1, s4, 32
-; GCN-NEXT:    v_writelane_b32 v1, s5, 33
-; GCN-NEXT:    v_writelane_b32 v1, s6, 34
-; GCN-NEXT:    v_writelane_b32 v1, s7, 35
-; GCN-NEXT:    v_writelane_b32 v1, s8, 36
-; GCN-NEXT:    v_writelane_b32 v1, s9, 37
-; GCN-NEXT:    v_writelane_b32 v1, s10, 38
-; GCN-NEXT:    v_writelane_b32 v1, s11, 39
-; GCN-NEXT:    v_writelane_b32 v1, s12, 40
-; GCN-NEXT:    v_writelane_b32 v1, s13, 41
-; GCN-NEXT:    v_writelane_b32 v1, s14, 42
-; GCN-NEXT:    v_writelane_b32 v1, s15, 43
-; GCN-NEXT:    v_writelane_b32 v1, s16, 44
-; GCN-NEXT:    v_writelane_b32 v1, s17, 45
-; GCN-NEXT:    v_writelane_b32 v1, s18, 46
-; GCN-NEXT:    v_writelane_b32 v1, s19, 47
+; GCN-NEXT:    v_writelane_b32 v32, s4, 32
+; GCN-NEXT:    v_writelane_b32 v32, s5, 33
+; GCN-NEXT:    v_writelane_b32 v32, s6, 34
+; GCN-NEXT:    v_writelane_b32 v32, s7, 35
+; GCN-NEXT:    v_writelane_b32 v32, s8, 36
+; GCN-NEXT:    v_writelane_b32 v32, s9, 37
+; GCN-NEXT:    v_writelane_b32 v32, s10, 38
+; GCN-NEXT:    v_writelane_b32 v32, s11, 39
+; GCN-NEXT:    v_writelane_b32 v32, s12, 40
+; GCN-NEXT:    v_writelane_b32 v32, s13, 41
+; GCN-NEXT:    v_writelane_b32 v32, s14, 42
+; GCN-NEXT:    v_writelane_b32 v32, s15, 43
+; GCN-NEXT:    v_writelane_b32 v32, s16, 44
+; GCN-NEXT:    v_writelane_b32 v32, s17, 45
+; GCN-NEXT:    v_writelane_b32 v32, s18, 46
+; GCN-NEXT:    v_writelane_b32 v32, s19, 47
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; def s[4:19]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_writelane_b32 v1, s4, 48
-; GCN-NEXT:    v_writelane_b32 v1, s5, 49
-; GCN-NEXT:    v_writelane_b32 v1, s6, 50
-; GCN-NEXT:    v_writelane_b32 v1, s7, 51
-; GCN-NEXT:    v_writelane_b32 v1, s8, 52
-; GCN-NEXT:    v_writelane_b32 v1, s9, 53
-; GCN-NEXT:    v_writelane_b32 v1, s10, 54
-; GCN-NEXT:    v_writelane_b32 v1, s11, 55
-; GCN-NEXT:    v_writelane_b32 v1, s12, 56
-; GCN-NEXT:    v_writelane_b32 v1, s13, 57
-; GCN-NEXT:    v_writelane_b32 v1, s14, 58
-; GCN-NEXT:    v_writelane_b32 v1, s15, 59
-; GCN-NEXT:    v_writelane_b32 v1, s16, 60
-; GCN-NEXT:    v_writelane_b32 v1, s17, 61
-; GCN-NEXT:    v_writelane_b32 v1, s18, 62
-; GCN-NEXT:    v_writelane_b32 v1, s19, 63
+; GCN-NEXT:    v_writelane_b32 v32, s4, 48
+; GCN-NEXT:    v_writelane_b32 v32, s5, 49
+; GCN-NEXT:    v_writelane_b32 v32, s6, 50
+; GCN-NEXT:    v_writelane_b32 v32, s7, 51
+; GCN-NEXT:    v_writelane_b32 v32, s8, 52
+; GCN-NEXT:    v_writelane_b32 v32, s9, 53
+; GCN-NEXT:    v_writelane_b32 v32, s10, 54
+; GCN-NEXT:    v_writelane_b32 v32, s11, 55
+; GCN-NEXT:    v_writelane_b32 v32, s12, 56
+; GCN-NEXT:    v_writelane_b32 v32, s13, 57
+; GCN-NEXT:    v_writelane_b32 v32, s14, 58
+; GCN-NEXT:    v_writelane_b32 v32, s15, 59
+; GCN-NEXT:    v_writelane_b32 v32, s16, 60
+; GCN-NEXT:    v_writelane_b32 v32, s17, 61
+; GCN-NEXT:    v_writelane_b32 v32, s18, 62
+; GCN-NEXT:    v_writelane_b32 v32, s19, 63
 ; GCN-NEXT:    s_or_saveexec_b64 s[34:35], -1
-; GCN-NEXT:    buffer_store_dword v1, off, s[52:55], 0 offset:4 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v32, off, s[52:55], 0 offset:4 ; 4-byte Folded Spill
 ; GCN-NEXT:    s_mov_b64 exec, s[34:35]
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; def s[2:3]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    s_waitcnt vmcnt(1)
-; GCN-NEXT:    v_writelane_b32 v0, s2, 0
-; GCN-NEXT:    v_writelane_b32 v0, s3, 1
+; GCN-NEXT:    ; implicit-def: $vgpr32 : SGPR spill to VGPR lane
+; GCN-NEXT:    v_writelane_b32 v32, s2, 0
+; GCN-NEXT:    v_writelane_b32 v32, s3, 1
 ; GCN-NEXT:    s_or_saveexec_b64 s[34:35], -1
-; GCN-NEXT:    buffer_store_dword v0, off, s[52:55], 0 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v32, off, s[52:55], 0 ; 4-byte Folded Spill
 ; GCN-NEXT:    s_mov_b64 exec, s[34:35]
 ; GCN-NEXT:    s_mov_b32 s1, 0
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
@@ -859,59 +829,59 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill(ptr addrspace(1) %out, i32 %
 ; GCN-NEXT:    s_cbranch_scc1 .LBB2_2
 ; GCN-NEXT:  ; %bb.1: ; %bb0
 ; GCN-NEXT:    s_or_saveexec_b64 s[34:35], -1
-; GCN-NEXT:    buffer_load_dword v1, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v31, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload
 ; GCN-NEXT:    s_mov_b64 exec, s[34:35]
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    v_readlane_b32 s36, v1, 32
-; GCN-NEXT:    v_readlane_b32 s37, v1, 33
-; GCN-NEXT:    v_readlane_b32 s38, v1, 34
-; GCN-NEXT:    v_readlane_b32 s39, v1, 35
-; GCN-NEXT:    v_readlane_b32 s40, v1, 36
-; GCN-NEXT:    v_readlane_b32 s41, v1, 37
-; GCN-NEXT:    v_readlane_b32 s42, v1, 38
-; GCN-NEXT:    v_readlane_b32 s43, v1, 39
-; GCN-NEXT:    v_readlane_b32 s44, v1, 40
-; GCN-NEXT:    v_readlane_b32 s45, v1, 41
-; GCN-NEXT:    v_readlane_b32 s46, v1, 42
-; GCN-NEXT:    v_readlane_b32 s47, v1, 43
-; GCN-NEXT:    v_readlane_b32 s48, v1, 44
-; GCN-NEXT:    v_readlane_b32 s49, v1, 45
-; GCN-NEXT:    v_readlane_b32 s50, v1, 46
-; GCN-NEXT:    v_readlane_b32 s51, v1, 47
-; GCN-NEXT:    v_readlane_b32 s0, v1, 16
-; GCN-NEXT:    v_readlane_b32 s1, v1, 17
-; GCN-NEXT:    v_readlane_b32 s2, v1, 18
-; GCN-NEXT:    v_readlane_b32 s3, v1, 19
-; GCN-NEXT:    v_readlane_b32 s4, v1, 20
-; GCN-NEXT:    v_readlane_b32 s5, v1, 21
-; GCN-NEXT:    v_readlane_b32 s6, v1, 22
-; GCN-NEXT:    v_readlane_b32 s7, v1, 23
-; GCN-NEXT:    v_readlane_b32 s8, v1, 24
-; GCN-NEXT:    v_readlane_b32 s9, v1, 25
-; GCN-NEXT:    v_readlane_b32 s10, v1, 26
-; GCN-NEXT:    v_readlane_b32 s11, v1, 27
-; GCN-NEXT:    v_readlane_b32 s12, v1, 28
-; GCN-NEXT:    v_readlane_b32 s13, v1, 29
-; GCN-NEXT:    v_readlane_b32 s14, v1, 30
-; GCN-NEXT:    v_readlane_b32 s15, v1, 31
-; GCN-NEXT:    v_readlane_b32 s16, v1, 0
-; GCN-NEXT:    v_readlane_b32 s17, v1, 1
-; GCN-NEXT:    v_readlane_b32 s18, v1, 2
-; GCN-NEXT:    v_readlane_b32 s19, v1, 3
-; GCN-NEXT:    v_readlane_b32 s20, v1, 4
-; GCN-NEXT:    v_readlane_b32 s21, v1, 5
-; GCN-NEXT:    v_readlane_b32 s22, v1, 6
-; GCN-NEXT:    v_readlane_b32 s23, v1, 7
-; GCN-NEXT:    v_readlane_b32 s24, v1, 8
-; GCN-NEXT:    v_readlane_b32 s25, v1, 9
-; GCN-NEXT:    v_readlane_b32 s26, v1, 10
-; GCN-NEXT:    v_readlane_b32 s27, v1, 11
-; GCN-NEXT:    v_readlane_b32 s28, v1, 12
-; GCN-NEXT:    v_readlane_b32 s29, v1, 13
-; GCN-NEXT:    v_readlane_b32 s30, v1, 14
-; GCN-NEXT:    v_readlane_b32 s31, v1, 15
+; GCN-NEXT:    v_readlane_b32 s36, v31, 32
+; GCN-NEXT:    v_readlane_b32 s37, v31, 33
+; GCN-NEXT:    v_readlane_b32 s38, v31, 34
+; GCN-NEXT:    v_readlane_b32 s39, v31, 35
+; GCN-NEXT:    v_readlane_b32 s40, v31, 36
+; GCN-NEXT:    v_readlane_b32 s41, v31, 37
+; GCN-NEXT:    v_readlane_b32 s42, v31, 38
+; GCN-NEXT:    v_readlane_b32 s43, v31, 39
+; GCN-NEXT:    v_readlane_b32 s44, v31, 40
+; GCN-NEXT:    v_readlane_b32 s45, v31, 41
+; GCN-NEXT:    v_readlane_b32 s46, v31, 42
+; GCN-NEXT:    v_readlane_b32 s47, v31, 43
+; GCN-NEXT:    v_readlane_b32 s48, v31, 44
+; GCN-NEXT:    v_readlane_b32 s49, v31, 45
+; GCN-NEXT:    v_readlane_b32 s50, v31, 46
+; GCN-NEXT:    v_readlane_b32 s51, v31, 47
+; GCN-NEXT:    v_readlane_b32 s0, v31, 16
+; GCN-NEXT:    v_readlane_b32 s1, v31, 17
+; GCN-NEXT:    v_readlane_b32 s2, v31, 18
+; GCN-NEXT:    v_readlane_b32 s3, v31, 19
+; GCN-NEXT:    v_readlane_b32 s4, v31, 20
+; GCN-NEXT:    v_readlane_b32 s5, v31, 21
+; GCN-NEXT:    v_readlane_b32 s6, v31, 22
+; GCN-NEXT:    v_readlane_b32 s7, v31, 23
+; GCN-NEXT:    v_readlane_b32 s8, v31, 24
+; GCN-NEXT:    v_readlane_b32 s9, v31, 25
+; GCN-NEXT:    v_readlane_b32 s10, v31, 26
+; GCN-NEXT:    v_readlane_b32 s11, v31, 27
+; GCN-NEXT:    v_readlane_b32 s12, v31, 28
+; GCN-NEXT:    v_readlane_b32 s13, v31, 29
+; GCN-NEXT:    v_readlane_b32 s14, v31, 30
+; GCN-NEXT:    v_readlane_b32 s15, v31, 31
+; GCN-NEXT:    v_readlane_b32 s16, v31, 0
+; GCN-NEXT:    v_readlane_b32 s17, v31, 1
+; GCN-NEXT:    v_readlane_b32 s18, v31, 2
+; GCN-NEXT:    v_readlane_b32 s19, v31, 3
+; GCN-NEXT:    v_readlane_b32 s20, v31, 4
+; GCN-NEXT:    v_readlane_b32 s21, v31, 5
+; GCN-NEXT:    v_readlane_b32 s22, v31, 6
+; GCN-NEXT:    v_readlane_b32 s23, v31, 7
+; GCN-NEXT:    v_readlane_b32 s24, v31, 8
+; GCN-NEXT:    v_readlane_b32 s25, v31, 9
+; GCN-NEXT:    v_readlane_b32 s26, v31, 10
+; GCN-NEXT:    v_readlane_b32 s27, v31, 11
+; GCN-NEXT:    v_readlane_b32 s28, v31, 12
+; GCN-NEXT:    v_readlane_b32 s29, v31, 13
+; GCN-NEXT:    v_readlane_b32 s30, v31, 14
+; GCN-NEXT:    v_readlane_b32 s31, v31, 15
 ; GCN-NEXT:    s_or_saveexec_b64 s[34:35], -1
-; GCN-NEXT:    buffer_load_dword v0, off, s[52:55], 0 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v32, off, s[52:55], 0 ; 4-byte Folded Reload
 ; GCN-NEXT:    s_mov_b64 exec, s[34:35]
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; use s[16:31]
@@ -919,25 +889,25 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill(ptr addrspace(1) %out, i32 %
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; use s[0:15]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_readlane_b32 s4, v1, 48
-; GCN-NEXT:    v_readlane_b32 s5, v1, 49
-; GCN-NEXT:    v_readlane_b32 s6, v1, 50
-; GCN-NEXT:    v_readlane_b32 s7, v1, 51
-; GCN-NEXT:    v_readlane_b32 s8, v1, 52
-; GCN-NEXT:    v_readlane_b32 s9, v1, 53
-; GCN-NEXT:    v_readlane_b32 s10, v1, 54
-; GCN-NEXT:    v_readlane_b32 s11, v1, 55
-; GCN-NEXT:    v_readlane_b32 s12, v1, 56
-; GCN-NEXT:    v_readlane_b32 s13, v1, 57
-; GCN-NEXT:    v_readlane_b32 s14, v1, 58
-; GCN-NEXT:    v_readlane_b32 s15, v1, 59
-; GCN-NEXT:    v_readlane_b32 s16, v1, 60
-; GCN-NEXT:    v_readlane_b32 s17, v1, 61
-; GCN-NEXT:    v_readlane_b32 s18, v1, 62
-; GCN-NEXT:    v_readlane_b32 s19, v1, 63
+; GCN-NEXT:    v_readlane_b32 s4, v31, 48
+; GCN-NEXT:    v_readlane_b32 s5, v31, 49
+; GCN-NEXT:    v_readlane_b32 s6, v31, 50
+; GCN-NEXT:    v_readlane_b32 s7, v31, 51
+; GCN-NEXT:    v_readlane_b32 s8, v31, 52
+; GCN-NEXT:    v_readlane_b32 s9, v31, 53
+; GCN-NEXT:    v_readlane_b32 s10, v31, 54
+; GCN-NEXT:    v_readlane_b32 s11, v31, 55
+; GCN-NEXT:    v_readlane_b32 s12, v31, 56
+; GCN-NEXT:    v_readlane_b32 s13, v31, 57
+; GCN-NEXT:    v_readlane_b32 s14, v31, 58
+; GCN-NEXT:    v_readlane_b32 s15, v31, 59
+; GCN-NEXT:    v_readlane_b32 s16, v31, 60
+; GCN-NEXT:    v_readlane_b32 s17, v31, 61
+; GCN-NEXT:    v_readlane_b32 s18, v31, 62
+; GCN-NEXT:    v_readlane_b32 s19, v31, 63
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    v_readlane_b32 s0, v0, 0
-; GCN-NEXT:    v_readlane_b32 s1, v0, 1
+; GCN-NEXT:    v_readlane_b32 s0, v32, 0
+; GCN-NEXT:    v_readlane_b32 s1, v32, 1
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; use s[36:51]
 ; GCN-NEXT:    ;;#ASMEND
@@ -948,14 +918,6 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill(ptr addrspace(1) %out, i32 %
 ; GCN-NEXT:    ; use s[0:1]
 ; GCN-NEXT:    ;;#ASMEND
 ; GCN-NEXT:  .LBB2_2: ; %ret
-; GCN-NEXT:    s_or_saveexec_b64 s[34:35], -1
-; GCN-NEXT:    buffer_load_dword v0, off, s[52:55], 0 ; 4-byte Folded Reload
-; GCN-NEXT:    s_mov_b64 exec, s[34:35]
-; GCN-NEXT:    s_or_saveexec_b64 s[34:35], -1
-; GCN-NEXT:    buffer_load_dword v1, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload
-; GCN-NEXT:    s_mov_b64 exec, s[34:35]
-; GCN-NEXT:    ; kill: killed $vgpr1
-; GCN-NEXT:    ; kill: killed $vgpr0
 ; GCN-NEXT:    s_endpgm
   call void asm sideeffect "", "~{v[0:7]}" () #0
   call void asm sideeffect "", "~{v[8:15]}" () #0
@@ -995,17 +957,9 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill_live_v0(i32 %in) #1 {
 ; GCN-NEXT:    s_mov_b32 s55, 0xe8f000
 ; GCN-NEXT:    s_add_u32 s52, s52, s9
 ; GCN-NEXT:    s_addc_u32 s53, s53, 0
-; GCN-NEXT:    ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
-; GCN-NEXT:    ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
 ; GCN-NEXT:    s_load_dword s0, s[2:3], 0x9
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    s_or_saveexec_b64 s[34:35], -1
-; GCN-NEXT:    buffer_load_dword v1, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload
-; GCN-NEXT:    s_mov_b64 exec, s[34:35]
-; GCN-NEXT:    s_or_saveexec_b64 s[34:35], -1
-; GCN-NEXT:    buffer_load_dword v0, off, s[52:55], 0 ; 4-byte Folded Reload
-; GCN-NEXT:    s_mov_b64 exec, s[34:35]
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ;;#ASMEND
 ; GCN-NEXT:    ;;#ASMSTART
@@ -1019,91 +973,91 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill_live_v0(i32 %in) #1 {
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; def s[4:19]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    s_waitcnt vmcnt(1)
-; GCN-NEXT:    v_writelane_b32 v1, s4, 0
-; GCN-NEXT:    v_writelane_b32 v1, s5, 1
-; GCN-NEXT:    v_writelane_b32 v1, s6, 2
-; GCN-NEXT:    v_writelane_b32 v1, s7, 3
-; GCN-NEXT:    v_writelane_b32 v1, s8, 4
-; GCN-NEXT:    v_writelane_b32 v1, s9, 5
-; GCN-NEXT:    v_writelane_b32 v1, s10, 6
-; GCN-NEXT:    v_writelane_b32 v1, s11, 7
-; GCN-NEXT:    v_writelane_b32 v1, s12, 8
-; GCN-NEXT:    v_writelane_b32 v1, s13, 9
-; GCN-NEXT:    v_writelane_b32 v1, s14, 10
-; GCN-NEXT:    v_writelane_b32 v1, s15, 11
-; GCN-NEXT:    v_writelane_b32 v1, s16, 12
-; GCN-NEXT:    v_writelane_b32 v1, s17, 13
-; GCN-NEXT:    v_writelane_b32 v1, s18, 14
-; GCN-NEXT:    v_writelane_b32 v1, s19, 15
+; GCN-NEXT:    ; implicit-def: $vgpr32 : SGPR spill to VGPR lane
+; GCN-NEXT:    v_writelane_b32 v32, s4, 0
+; GCN-NEXT:    v_writelane_b32 v32, s5, 1
+; GCN-NEXT:    v_writelane_b32 v32, s6, 2
+; GCN-NEXT:    v_writelane_b32 v32, s7, 3
+; GCN-NEXT:    v_writelane_b32 v32, s8, 4
+; GCN-NEXT:    v_writelane_b32 v32, s9, 5
+; GCN-NEXT:    v_writelane_b32 v32, s10, 6
+; GCN-NEXT:    v_writelane_b32 v32, s11, 7
+; GCN-NEXT:    v_writelane_b32 v32, s12, 8
+; GCN-NEXT:    v_writelane_b32 v32, s13, 9
+; GCN-NEXT:    v_writelane_b32 v32, s14, 10
+; GCN-NEXT:    v_writelane_b32 v32, s15, 11
+; GCN-NEXT:    v_writelane_b32 v32, s16, 12
+; GCN-NEXT:    v_writelane_b32 v32, s17, 13
+; GCN-NEXT:    v_writelane_b32 v32, s18, 14
+; GCN-NEXT:    v_writelane_b32 v32, s19, 15
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; def s[4:19]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_writelane_b32 v1, s4, 16
-; GCN-NEXT:    v_writelane_b32 v1, s5, 17
-; GCN-NEXT:    v_writelane_b32 v1, s6, 18
-; GCN-NEXT:    v_writelane_b32 v1, s7, 19
-; GCN-NEXT:    v_writelane_b32 v1, s8, 20
-; GCN-NEXT:    v_writelane_b32 v1, s9, 21
-; GCN-NEXT:    v_writelane_b32 v1, s10, 22
-; GCN-NEXT:    v_writelane_b32 v1, s11, 23
-; GCN-NEXT:    v_writelane_b32 v1, s12, 24
-; GCN-NEXT:    v_writelane_b32 v1, s13, 25
-; GCN-NEXT:    v_writelane_b32 v1, s14, 26
-; GCN-NEXT:    v_writelane_b32 v1, s15, 27
-; GCN-NEXT:    v_writelane_b32 v1, s16, 28
-; GCN-NEXT:    v_writelane_b32 v1, s17, 29
-; GCN-NEXT:    v_writelane_b32 v1, s18, 30
-; GCN-NEXT:    v_writelane_b32 v1, s19, 31
+; GCN-NEXT:    v_writelane_b32 v32, s4, 16
+; GCN-NEXT:    v_writelane_b32 v32, s5, 17
+; GCN-NEXT:    v_writelane_b32 v32, s6, 18
+; GCN-NEXT:    v_writelane_b32 v32, s7, 19
+; GCN-NEXT:    v_writelane_b32 v32, s8, 20
+; GCN-NEXT:    v_writelane_b32 v32, s9, 21
+; GCN-NEXT:    v_writelane_b32 v32, s10, 22
+; GCN-NEXT:    v_writelane_b32 v32, s11, 23
+; GCN-NEXT:    v_writelane_b32 v32, s12, 24
+; GCN-NEXT:    v_writelane_b32 v32, s13, 25
+; GCN-NEXT:    v_writelane_b32 v32, s14, 26
+; GCN-NEXT:    v_writelane_b32 v32, s15, 27
+; GCN-NEXT:    v_writelane_b32 v32, s16, 28
+; GCN-NEXT:    v_writelane_b32 v32, s17, 29
+; GCN-NEXT:    v_writelane_b32 v32, s18, 30
+; GCN-NEXT:    v_writelane_b32 v32, s19, 31
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; def s[4:19]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_writelane_b32 v1, s4, 32
-; GCN-NEXT:    v_writelane_b32 v1, s5, 33
-; GCN-NEXT:    v_writelane_b32 v1, s6, 34
-; GCN-NEXT:    v_writelane_b32 v1, s7, 35
-; GCN-NEXT:    v_writelane_b32 v1, s8, 36
-; GCN-NEXT:    v_writelane_b32 v1, s9, 37
-; GCN-NEXT:    v_writelane_b32 v1, s10, 38
-; GCN-NEXT:    v_writelane_b32 v1, s11, 39
-; GCN-NEXT:    v_writelane_b32 v1, s12, 40
-; GCN-NEXT:    v_writelane_b32 v1, s13, 41
-; GCN-NEXT:    v_writelane_b32 v1, s14, 42
-; GCN-NEXT:    v_writelane_b32 v1, s15, 43
-; GCN-NEXT:    v_writelane_b32 v1, s16, 44
-; GCN-NEXT:    v_writelane_b32 v1, s17, 45
-; GCN-NEXT:    v_writelane_b32 v1, s18, 46
-; GCN-NEXT:    v_writelane_b32 v1, s19, 47
+; GCN-NEXT:    v_writelane_b32 v32, s4, 32
+; GCN-NEXT:    v_writelane_b32 v32, s5, 33
+; GCN-NEXT:    v_writelane_b32 v32, s6, 34
+; GCN-NEXT:    v_writelane_b32 v32, s7, 35
+; GCN-NEXT:    v_writelane_b32 v32, s8, 36
+; GCN-NEXT:    v_writelane_b32 v32, s9, 37
+; GCN-NEXT:    v_writelane_b32 v32, s10, 38
+; GCN-NEXT:    v_writelane_b32 v32, s11, 39
+; GCN-NEXT:    v_writelane_b32 v32, s12, 40
+; GCN-NEXT:    v_writelane_b32 v32, s13, 41
+; GCN-NEXT:    v_writelane_b32 v32, s14, 42
+; GCN-NEXT:    v_writelane_b32 v32, s15, 43
+; GCN-NEXT:    v_writelane_b32 v32, s16, 44
+; GCN-NEXT:    v_writelane_b32 v32, s17, 45
+; GCN-NEXT:    v_writelane_b32 v32, s18, 46
+; GCN-NEXT:    v_writelane_b32 v32, s19, 47
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; def s[4:19]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_writelane_b32 v1, s4, 48
-; GCN-NEXT:    v_writelane_b32 v1, s5, 49
-; GCN-NEXT:    v_writelane_b32 v1, s6, 50
-; GCN-NEXT:    v_writelane_b32 v1, s7, 51
-; GCN-NEXT:    v_writelane_b32 v1, s8, 52
-; GCN-NEXT:    v_writelane_b32 v1, s9, 53
-; GCN-NEXT:    v_writelane_b32 v1, s10, 54
-; GCN-NEXT:    v_writelane_b32 v1, s11, 55
-; GCN-NEXT:    v_writelane_b32 v1, s12, 56
-; GCN-NEXT:    v_writelane_b32 v1, s13, 57
-; GCN-NEXT:    v_writelane_b32 v1, s14, 58
-; GCN-NEXT:    v_writelane_b32 v1, s15, 59
-; GCN-NEXT:    v_writelane_b32 v1, s16, 60
-; GCN-NEXT:    v_writelane_b32 v1, s17, 61
-; GCN-NEXT:    v_writelane_b32 v1, s18, 62
-; GCN-NEXT:    v_writelane_b32 v1, s19, 63
+; GCN-NEXT:    v_writelane_b32 v32, s4, 48
+; GCN-NEXT:    v_writelane_b32 v32, s5, 49
+; GCN-NEXT:    v_writelane_b32 v32, s6, 50
+; GCN-NEXT:    v_writelane_b32 v32, s7, 51
+; GCN-NEXT:    v_writelane_b32 v32, s8, 52
+; GCN-NEXT:    v_writelane_b32 v32, s9, 53
+; GCN-NEXT:    v_writelane_b32 v32, s10, 54
+; GCN-NEXT:    v_writelane_b32 v32, s11, 55
+; GCN-NEXT:    v_writelane_b32 v32, s12, 56
+; GCN-NEXT:    v_writelane_b32 v32, s13, 57
+; GCN-NEXT:    v_writelane_b32 v32, s14, 58
+; GCN-NEXT:    v_writelane_b32 v32, s15, 59
+; GCN-NEXT:    v_writelane_b32 v32, s16, 60
+; GCN-NEXT:    v_writelane_b32 v32, s17, 61
+; GCN-NEXT:    v_writelane_b32 v32, s18, 62
+; GCN-NEXT:    v_writelane_b32 v32, s19, 63
 ; GCN-NEXT:    s_or_saveexec_b64 s[34:35], -1
-; GCN-NEXT:    buffer_store_dword v1, off, s[52:55], 0 offset:4 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v32, off, s[52:55], 0 offset:4 ; 4-byte Folded Spill
 ; GCN-NEXT:    s_mov_b64 exec, s[34:35]
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; def s[2:3]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    s_waitcnt vmcnt(1)
-; GCN-NEXT:    v_writelane_b32 v0, s2, 0
-; GCN-NEXT:    v_writelane_b32 v0, s3, 1
+; GCN-NEXT:    ; implicit-def: $vgpr32 : SGPR spill to VGPR lane
+; GCN-NEXT:    v_writelane_b32 v32, s2, 0
+; GCN-NEXT:    v_writelane_b32 v32, s3, 1
 ; GCN-NEXT:    s_or_saveexec_b64 s[34:35], -1
-; GCN-NEXT:    buffer_store_dword v0, off, s[52:55], 0 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v32, off, s[52:55], 0 ; 4-byte Folded Spill
 ; GCN-NEXT:    s_mov_b64 exec, s[34:35]
 ; GCN-NEXT:    s_mov_b32 s1, 0
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1111,59 +1065,59 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill_live_v0(i32 %in) #1 {
 ; GCN-NEXT:    s_cbranch_scc1 .LBB3_2
 ; GCN-NEXT:  ; %bb.1: ; %bb0
 ; GCN-NEXT:    s_or_saveexec_b64 s[34:35], -1
-; GCN-NEXT:    buffer_load_dword v2, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v31, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload
 ; GCN-NEXT:    s_mov_b64 exec, s[34:35]
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    v_readlane_b32 s36, v2, 32
-; GCN-NEXT:    v_readlane_b32 s37, v2, 33
-; GCN-NEXT:    v_readlane_b32 s38, v2, 34
-; GCN-NEXT:    v_readlane_b32 s39, v2, 35
-; GCN-NEXT:    v_readlane_b32 s40, v2, 36
-; GCN-NEXT:    v_readlane_b32 s41, v2, 37
-; GCN-NEXT:    v_readlane_b32 s42, v2, 38
-; GCN-NEXT:    v_readlane_b32 s43, v2, 39
-; GCN-NEXT:    v_readlane_b32 s44, v2, 40
-; GCN-NEXT:    v_readlane_b32 s45, v2, 41
-; GCN-NEXT:    v_readlane_b32 s46, v2, 42
-; GCN-NEXT:    v_readlane_b32 s47, v2, 43
-; GCN-NEXT:    v_readlane_b32 s48, v2, 44
-; GCN-NEXT:    v_readlane_b32 s49, v2, 45
-; GCN-NEXT:    v_readlane_b32 s50, v2, 46
-; GCN-NEXT:    v_readlane_b32 s51, v2, 47
-; GCN-NEXT:    v_readlane_b32 s0, v2, 16
-; GCN-NEXT:    v_readlane_b32 s1, v2, 17
-; GCN-NEXT:    v_readlane_b32 s2, v2, 18
-; GCN-NEXT:    v_readlane_b32 s3, v2, 19
-; GCN-NEXT:    v_readlane_b32 s4, v2, 20
-; GCN-NEXT:    v_readlane_b32 s5, v2, 21
-; GCN-NEXT:    v_readlane_b32 s6, v2, 22
-; GCN-NEXT:    v_readlane_b32 s7, v2, 23
-; GCN-NEXT:    v_readlane_b32 s8, v2, 24
-; GCN-NEXT:    v_readlane_b32 s9, v2, 25
-; GCN-NEXT:    v_readlane_b32 s10, v2, 26
-; GCN-NEXT:    v_readlane_b32 s11, v2, 27
-; GCN-NEXT:    v_readlane_b32 s12, v2, 28
-; GCN-NEXT:    v_readlane_b32 s13, v2, 29
-; GCN-NEXT:    v_readlane_b32 s14, v2, 30
-; GCN-NEXT:    v_readlane_b32 s15, v2, 31
-; GCN-NEXT:    v_readlane_b32 s16, v2, 0
-; GCN-NEXT:    v_readlane_b32 s17, v2, 1
-; GCN-NEXT:    v_readlane_b32 s18, v2, 2
-; GCN-NEXT:    v_readlane_b32 s19, v2, 3
-; GCN-NEXT:    v_readlane_b32 s20, v2, 4
-; GCN-NEXT:    v_readlane_b32 s21, v2, 5
-; GCN-NEXT:    v_readlane_b32 s22, v2, 6
-; GCN-NEXT:    v_readlane_b32 s23, v2, 7
-; GCN-NEXT:    v_readlane_b32 s24, v2, 8
-; GCN-NEXT:    v_readlane_b32 s25, v2, 9
-; GCN-NEXT:    v_readlane_b32 s26, v2, 10
-; GCN-NEXT:    v_readlane_b32 s27, v2, 11
-; GCN-NEXT:    v_readlane_b32 s28, v2, 12
-; GCN-NEXT:    v_readlane_b32 s29, v2, 13
-; GCN-NEXT:    v_readlane_b32 s30, v2, 14
-; GCN-NEXT:    v_readlane_b32 s31, v2, 15
+; GCN-NEXT:    v_readlane_b32 s36, v31, 32
+; GCN-NEXT:    v_readlane_b32 s37, v31, 33
+; GCN-NEXT:    v_readlane_b32 s38, v31, 34
+; GCN-NEXT:    v_readlane_b32 s39, v31, 35
+; GCN-NEXT:    v_readlane_b32 s40, v31, 36
+; GCN-NEXT:    v_readlane_b32 s41, v31, 37
+; GCN-NEXT:    v_readlane_b32 s42, v31, 38
+; GCN-NEXT:    v_readlane_b32 s43, v31, 39
+; GCN-NEXT:    v_readlane_b32 s44, v31, 40
+; GCN-NEXT:    v_readlane_b32 s45, v31, 41
+; GCN-NEXT:    v_readlane_b32 s46, v31, 42
+; GCN-NEXT:    v_readlane_b32 s47, v31, 43
+; GCN-NEXT:    v_readlane_b32 s48, v31, 44
+; GCN-NEXT:    v_readlane_b32 s49, v31, 45
+; GCN-NEXT:    v_readlane_b32 s50, v31, 46
+; GCN-NEXT:    v_readlane_b32 s51, v31, 47
+; GCN-NEXT:    v_readlane_b32 s0, v31, 16
+; GCN-NEXT:    v_readlane_b32 s1, v31, 17
+; GCN-NEXT:    v_readlane_b32 s2, v31, 18
+; GCN-NEXT:    v_readlane_b32 s3, v31, 19
+; GCN-NEXT:    v_readlane_b32 s4, v31, 20
+; GCN-NEXT:    v_readlane_b32 s5, v31, 21
+; GCN-NEXT:    v_readlane_b32 s6, v31, 22
+; GCN-NEXT:    v_readlane_b32 s7, v31, 23
+; GCN-NEXT:    v_readlane_b32 s8, v31, 24
+; GCN-NEXT:    v_readlane_b32 s9, v31, 25
+; GCN-NEXT:    v_readlane_b32 s10, v31, 26
+; GCN-NEXT:    v_readlane_b32 s11, v31, 27
+; GCN-NEXT:    v_readlane_b32 s12, v31, 28
+; GCN-NEXT:    v_readlane_b32 s13, v31, 29
+; GCN-NEXT:    v_readlane_b32 s14, v31, 30
+; GCN-NEXT:    v_readlane_b32 s15, v31, 31
+; GCN-NEXT:    v_readlane_b32 s16, v31, 0
+; GCN-NEXT:    v_readlane_b32 s17, v31, 1
+; GCN-NEXT:    v_readlane_b32 s18, v31, 2
+; GCN-NEXT:    v_readlane_b32 s19, v31, 3
+; GCN-NEXT:    v_readlane_b32 s20, v31, 4
+; GCN-NEXT:    v_readlane_b32 s21, v31, 5
+; GCN-NEXT:    v_readlane_b32 s22, v31, 6
+; GCN-NEXT:    v_readlane_b32 s23, v31, 7
+; GCN-NEXT:    v_readlane_b32 s24, v31, 8
+; GCN-NEXT:    v_readlane_b32 s25, v31, 9
+; GCN-NEXT:    v_readlane_b32 s26, v31, 10
+; GCN-NEXT:    v_readlane_b32 s27, v31, 11
+; GCN-NEXT:    v_readlane_b32 s28, v31, 12
+; GCN-NEXT:    v_readlane_b32 s29, v31, 13
+; GCN-NEXT:    v_readlane_b32 s30, v31, 14
+; GCN-NEXT:    v_readlane_b32 s31, v31, 15
 ; GCN-NEXT:    s_or_saveexec_b64 s[34:35], -1
-; GCN-NEXT:    buffer_load_dword v1, off, s[52:55], 0 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v32, off, s[52:55], 0 ; 4-byte Folded Reload
 ; GCN-NEXT:    s_mov_b64 exec, s[34:35]
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; def v0
@@ -1174,25 +1128,25 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill_live_v0(i32 %in) #1 {
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; use s[0:15]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_readlane_b32 s4, v2, 48
-; GCN-NEXT:    v_readlane_b32 s5, v2, 49
-; GCN-NEXT:    v_readlane_b32 s6, v2, 50
-; GCN-NEXT:    v_readlane_b32 s7, v2, 51
-; GCN-NEXT:    v_readlane_b32 s8, v2, 52
-; GCN-NEXT:    v_readlane_b32 s9, v2, 53
-; GCN-NEXT:    v_readlane_b32 s10, v2, 54
-; GCN-NEXT:    v_readlane_b32 s11, v2, 55
-; GCN-NEXT:    v_readlane_b32 s12, v2, 56
-; GCN-NEXT:    v_readlane_b32 s13, v2, 57
-; GCN-NEXT:    v_readlane_b32 s14, v2, 58
-; GCN-NEXT:    v_readlane_b32 s15, v2, 59
-; GCN-NEXT:    v_readlane_b32 s16, v2, 60
-; GCN-NEXT:    v_readlane_b32 s17, v2, 61
-; GCN-NEXT:    v_readlane_b32 s18, v2, 62
-; GCN-NEXT:    v_readlane_b32 s19, v2, 63
+; GCN-NEXT:    v_readlane_b32 s4, v31, 48
+; GCN-NEXT:    v_readlane_b32 s5, v31, 49
+; GCN-NEXT:    v_readlane_b32 s6, v31, 50
+; GCN-NEXT:    v_readlane_b32 s7, v31, 51
+; GCN-NEXT:    v_readlane_b32 s8, v31, 52
+; GCN-NEXT:    v_readlane_b32 s9, v31, 53
+; GCN-NEXT:    v_readlane_b32 s10, v31, 54
+; GCN-NEXT:    v_readlane_b32 s11, v31, 55
+; GCN-NEXT:    v_readlane_b32 s12, v31, 56
+; GCN-NEXT:    v_readlane_b32 s13, v31, 57
+; GCN-NEXT:    v_readlane_b32 s14, v31, 58
+; GCN-NEXT:    v_readlane_b32 s15, v31, 59
+; GCN-NEXT:    v_readlane_b32 s16, v31, 60
+; GCN-NEXT:    v_readlane_b32 s17, v31, 61
+; GCN-NEXT:    v_readlane_b32 s18, v31, 62
+; GCN-NEXT:    v_readlane_b32 s19, v31, 63
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    v_readlane_b32 s0, v1, 0
-; GCN-NEXT:    v_readlane_b32 s1, v1, 1
+; GCN-NEXT:    v_readlane_b32 s0, v32, 0
+; GCN-NEXT:    v_readlane_b32 s1, v32, 1
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; use s[36:51]
 ; GCN-NEXT:    ;;#ASMEND
@@ -1206,14 +1160,6 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill_live_v0(i32 %in) #1 {
 ; GCN-NEXT:    ; use v0
 ; GCN-NEXT:    ;;#ASMEND
 ; GCN-NEXT:  .LBB3_2: ; %ret
-; GCN-NEXT:    s_or_saveexec_b64 s[34:35], -1
-; GCN-NEXT:    buffer_load_dword v0, off, s[52:55], 0 ; 4-byte Folded Reload
-; GCN-NEXT:    s_mov_b64 exec, s[34:35]
-; GCN-NEXT:    s_or_saveexec_b64 s[34:35], -1
-; GCN-NEXT:    buffer_load_dword v1, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload
-; GCN-NEXT:    s_mov_b64 exec, s[34:35]
-; GCN-NEXT:    ; kill: killed $vgpr1
-; GCN-NEXT:    ; kill: killed $vgpr0
 ; GCN-NEXT:    s_endpgm
   call void asm sideeffect "", "~{v[0:7]}" () #0
   call void asm sideeffect "", "~{v[8:15]}" () #0
@@ -1245,7 +1191,7 @@ ret:
 }
 
 attributes #0 = { nounwind }
-attributes #1 = { nounwind "amdgpu-waves-per-eu"="8,8" }
+attributes #1 = { nounwind "amdgpu-waves-per-eu"="7,7" }
 
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/scc-clobbered-sgpr-to-vmem-spill.ll b/llvm/test/CodeGen/AMDGPU/scc-clobbered-sgpr-to-vmem-spill.ll
index 477183e63ff16..0fe7666322949 100644
--- a/llvm/test/CodeGen/AMDGPU/scc-clobbered-sgpr-to-vmem-spill.ll
+++ b/llvm/test/CodeGen/AMDGPU/scc-clobbered-sgpr-to-vmem-spill.ll
@@ -1,8 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s
 
-; XFAIL: *
-
 ; This was a negative test to catch an extreme case when all options are exhausted
 ; while trying to spill SGPRs to memory. After we enabled SGPR spills into virtual VGPRs
 ; the edge case won't arise and the test would always compile.
@@ -10,9 +8,6 @@
 define amdgpu_kernel void @kernel0(ptr addrspace(1) %out, i32 %in) #1 {
 ; CHECK-LABEL: kernel0:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    ; implicit-def: $vgpr23 : SGPR spill to VGPR lane
-; CHECK-NEXT:    ;;#ASMSTART
-; CHECK-NEXT:    ;;#ASMEND
 ; CHECK-NEXT:    ;;#ASMSTART
 ; CHECK-NEXT:    ;;#ASMEND
 ; CHECK-NEXT:    ;;#ASMSTART
@@ -24,46 +19,47 @@ define amdgpu_kernel void @kernel0(ptr addrspace(1) %out, i32 %in) #1 {
 ; CHECK-NEXT:    ;;#ASMSTART
 ; CHECK-NEXT:    ; def s[2:3]
 ; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    v_writelane_b32 v23, s2, 0
+; CHECK-NEXT:    ; implicit-def: $vgpr22 : SGPR spill to VGPR lane
 ; CHECK-NEXT:    s_load_dword s0, s[4:5], 0x8
-; CHECK-NEXT:    v_writelane_b32 v23, s3, 1
+; CHECK-NEXT:    v_writelane_b32 v22, s2, 0
+; CHECK-NEXT:    v_writelane_b32 v22, s3, 1
 ; CHECK-NEXT:    ;;#ASMSTART
 ; CHECK-NEXT:    ; def s[4:7]
 ; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    v_writelane_b32 v23, s4, 2
-; CHECK-NEXT:    v_writelane_b32 v23, s5, 3
-; CHECK-NEXT:    v_writelane_b32 v23, s6, 4
-; CHECK-NEXT:    v_writelane_b32 v23, s7, 5
+; CHECK-NEXT:    v_writelane_b32 v22, s4, 2
+; CHECK-NEXT:    v_writelane_b32 v22, s5, 3
+; CHECK-NEXT:    v_writelane_b32 v22, s6, 4
+; CHECK-NEXT:    v_writelane_b32 v22, s7, 5
 ; CHECK-NEXT:    ;;#ASMSTART
 ; CHECK-NEXT:    ; def s[4:11]
 ; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    v_writelane_b32 v23, s4, 6
-; CHECK-NEXT:    v_writelane_b32 v23, s5, 7
-; CHECK-NEXT:    v_writelane_b32 v23, s6, 8
-; CHECK-NEXT:    v_writelane_b32 v23, s7, 9
-; CHECK-NEXT:    v_writelane_b32 v23, s8, 10
-; CHECK-NEXT:    v_writelane_b32 v23, s9, 11
-; CHECK-NEXT:    v_writelane_b32 v23, s10, 12
-; CHECK-NEXT:    v_writelane_b32 v23, s11, 13
+; CHECK-NEXT:    v_writelane_b32 v22, s4, 6
+; CHECK-NEXT:    v_writelane_b32 v22, s5, 7
+; CHECK-NEXT:    v_writelane_b32 v22, s6, 8
+; CHECK-NEXT:    v_writelane_b32 v22, s7, 9
+; CHECK-NEXT:    v_writelane_b32 v22, s8, 10
+; CHECK-NEXT:    v_writelane_b32 v22, s9, 11
+; CHECK-NEXT:    v_writelane_b32 v22, s10, 12
+; CHECK-NEXT:    v_writelane_b32 v22, s11, 13
 ; CHECK-NEXT:    ;;#ASMSTART
 ; CHECK-NEXT:    ; def s[4:19]
 ; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    v_writelane_b32 v23, s4, 14
-; CHECK-NEXT:    v_writelane_b32 v23, s5, 15
-; CHECK-NEXT:    v_writelane_b32 v23, s6, 16
-; CHECK-NEXT:    v_writelane_b32 v23, s7, 17
-; CHECK-NEXT:    v_writelane_b32 v23, s8, 18
-; CHECK-NEXT:    v_writelane_b32 v23, s9, 19
-; CHECK-NEXT:    v_writelane_b32 v23, s10, 20
-; CHECK-NEXT:    v_writelane_b32 v23, s11, 21
-; CHECK-NEXT:    v_writelane_b32 v23, s12, 22
-; CHECK-NEXT:    v_writelane_b32 v23, s13, 23
-; CHECK-NEXT:    v_writelane_b32 v23, s14, 24
-; CHECK-NEXT:    v_writelane_b32 v23, s15, 25
-; CHECK-NEXT:    v_writelane_b32 v23, s16, 26
-; CHECK-NEXT:    v_writelane_b32 v23, s17, 27
-; CHECK-NEXT:    v_writelane_b32 v23, s18, 28
-; CHECK-NEXT:    v_writelane_b32 v23, s19, 29
+; CHECK-NEXT:    v_writelane_b32 v22, s4, 14
+; CHECK-NEXT:    v_writelane_b32 v22, s5, 15
+; CHECK-NEXT:    v_writelane_b32 v22, s6, 16
+; CHECK-NEXT:    v_writelane_b32 v22, s7, 17
+; CHECK-NEXT:    v_writelane_b32 v22, s8, 18
+; CHECK-NEXT:    v_writelane_b32 v22, s9, 19
+; CHECK-NEXT:    v_writelane_b32 v22, s10, 20
+; CHECK-NEXT:    v_writelane_b32 v22, s11, 21
+; CHECK-NEXT:    v_writelane_b32 v22, s12, 22
+; CHECK-NEXT:    v_writelane_b32 v22, s13, 23
+; CHECK-NEXT:    v_writelane_b32 v22, s14, 24
+; CHECK-NEXT:    v_writelane_b32 v22, s15, 25
+; CHECK-NEXT:    v_writelane_b32 v22, s16, 26
+; CHECK-NEXT:    v_writelane_b32 v22, s17, 27
+; CHECK-NEXT:    v_writelane_b32 v22, s18, 28
+; CHECK-NEXT:    v_writelane_b32 v22, s19, 29
 ; CHECK-NEXT:    ;;#ASMSTART
 ; CHECK-NEXT:    ; def s[42:43]
 ; CHECK-NEXT:    ;;#ASMEND
@@ -73,14 +69,14 @@ define amdgpu_kernel void @kernel0(ptr addrspace(1) %out, i32 %in) #1 {
 ; CHECK-NEXT:    ;;#ASMSTART
 ; CHECK-NEXT:    ; def s[4:11]
 ; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    v_writelane_b32 v23, s4, 30
-; CHECK-NEXT:    v_writelane_b32 v23, s5, 31
-; CHECK-NEXT:    v_writelane_b32 v23, s6, 32
-; CHECK-NEXT:    v_writelane_b32 v23, s7, 33
-; CHECK-NEXT:    v_writelane_b32 v23, s8, 34
-; CHECK-NEXT:    v_writelane_b32 v23, s9, 35
-; CHECK-NEXT:    v_writelane_b32 v23, s10, 36
-; CHECK-NEXT:    v_writelane_b32 v23, s11, 37
+; CHECK-NEXT:    v_writelane_b32 v22, s4, 30
+; CHECK-NEXT:    v_writelane_b32 v22, s5, 31
+; CHECK-NEXT:    v_writelane_b32 v22, s6, 32
+; CHECK-NEXT:    v_writelane_b32 v22, s7, 33
+; CHECK-NEXT:    v_writelane_b32 v22, s8, 34
+; CHECK-NEXT:    v_writelane_b32 v22, s9, 35
+; CHECK-NEXT:    v_writelane_b32 v22, s10, 36
+; CHECK-NEXT:    v_writelane_b32 v22, s11, 37
 ; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
 ; CHECK-NEXT:    s_cmp_lg_u32 s0, 0
 ; CHECK-NEXT:    ;;#ASMSTART
@@ -98,161 +94,159 @@ define amdgpu_kernel void @kernel0(ptr addrspace(1) %out, i32 %in) #1 {
 ; CHECK-NEXT:    ;;#ASMSTART
 ; CHECK-NEXT:    ; def s[0:15]
 ; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    v_writelane_b32 v23, s0, 38
-; CHECK-NEXT:    v_writelane_b32 v23, s1, 39
-; CHECK-NEXT:    v_writelane_b32 v23, s2, 40
-; CHECK-NEXT:    v_writelane_b32 v23, s3, 41
-; CHECK-NEXT:    v_writelane_b32 v23, s4, 42
-; CHECK-NEXT:    v_writelane_b32 v23, s5, 43
-; CHECK-NEXT:    v_writelane_b32 v23, s6, 44
-; CHECK-NEXT:    v_writelane_b32 v23, s7, 45
-; CHECK-NEXT:    v_writelane_b32 v23, s8, 46
-; CHECK-NEXT:    v_writelane_b32 v23, s9, 47
-; CHECK-NEXT:    v_writelane_b32 v23, s10, 48
-; CHECK-NEXT:    v_writelane_b32 v23, s11, 49
-; CHECK-NEXT:    v_writelane_b32 v23, s12, 50
-; CHECK-NEXT:    v_writelane_b32 v23, s13, 51
-; CHECK-NEXT:    v_writelane_b32 v23, s14, 52
-; CHECK-NEXT:    v_writelane_b32 v23, s15, 53
+; CHECK-NEXT:    v_writelane_b32 v22, s0, 38
+; CHECK-NEXT:    v_writelane_b32 v22, s1, 39
+; CHECK-NEXT:    v_writelane_b32 v22, s2, 40
+; CHECK-NEXT:    v_writelane_b32 v22, s3, 41
+; CHECK-NEXT:    v_writelane_b32 v22, s4, 42
+; CHECK-NEXT:    v_writelane_b32 v22, s5, 43
+; CHECK-NEXT:    v_writelane_b32 v22, s6, 44
+; CHECK-NEXT:    v_writelane_b32 v22, s7, 45
+; CHECK-NEXT:    v_writelane_b32 v22, s8, 46
+; CHECK-NEXT:    v_writelane_b32 v22, s9, 47
+; CHECK-NEXT:    v_writelane_b32 v22, s10, 48
+; CHECK-NEXT:    v_writelane_b32 v22, s11, 49
+; CHECK-NEXT:    v_writelane_b32 v22, s12, 50
+; CHECK-NEXT:    v_writelane_b32 v22, s13, 51
+; CHECK-NEXT:    v_writelane_b32 v22, s14, 52
+; CHECK-NEXT:    v_writelane_b32 v22, s15, 53
 ; CHECK-NEXT:    ;;#ASMSTART
 ; CHECK-NEXT:    ; def s[34:35]
 ; CHECK-NEXT:    ;;#ASMEND
 ; CHECK-NEXT:    ;;#ASMSTART
 ; CHECK-NEXT:    ; def s[0:3]
 ; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    v_writelane_b32 v23, s0, 54
-; CHECK-NEXT:    v_writelane_b32 v23, s1, 55
-; CHECK-NEXT:    v_writelane_b32 v23, s2, 56
-; CHECK-NEXT:    v_writelane_b32 v23, s3, 57
+; CHECK-NEXT:    v_writelane_b32 v22, s0, 54
+; CHECK-NEXT:    v_writelane_b32 v22, s1, 55
+; CHECK-NEXT:    v_writelane_b32 v22, s2, 56
+; CHECK-NEXT:    v_writelane_b32 v22, s3, 57
 ; CHECK-NEXT:    ;;#ASMSTART
 ; CHECK-NEXT:    ; def s[0:7]
 ; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    v_writelane_b32 v23, s0, 58
-; CHECK-NEXT:    v_writelane_b32 v23, s1, 59
-; CHECK-NEXT:    v_writelane_b32 v23, s2, 60
-; CHECK-NEXT:    ; implicit-def: $vgpr0
-; CHECK-NEXT:    v_writelane_b32 v23, s3, 61
-; CHECK-NEXT:    v_writelane_b32 v23, s4, 62
-; CHECK-NEXT:    v_writelane_b32 v0, s6, 0
-; CHECK-NEXT:    v_writelane_b32 v23, s5, 63
-; CHECK-NEXT:    v_writelane_b32 v0, s7, 1
+; CHECK-NEXT:    v_writelane_b32 v22, s0, 58
+; CHECK-NEXT:    v_writelane_b32 v22, s1, 59
+; CHECK-NEXT:    v_writelane_b32 v22, s2, 60
+; CHECK-NEXT:    ; implicit-def: $vgpr23 : SGPR spill to VGPR lane
+; CHECK-NEXT:    v_writelane_b32 v22, s3, 61
+; CHECK-NEXT:    v_writelane_b32 v22, s4, 62
+; CHECK-NEXT:    v_writelane_b32 v23, s6, 0
+; CHECK-NEXT:    v_writelane_b32 v22, s5, 63
+; CHECK-NEXT:    v_writelane_b32 v23, s7, 1
 ; CHECK-NEXT:    ;;#ASMSTART
 ; CHECK-NEXT:    ; def s[0:15]
 ; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    v_writelane_b32 v0, s0, 2
-; CHECK-NEXT:    v_writelane_b32 v0, s1, 3
-; CHECK-NEXT:    v_writelane_b32 v0, s2, 4
-; CHECK-NEXT:    v_writelane_b32 v0, s3, 5
-; CHECK-NEXT:    v_writelane_b32 v0, s4, 6
-; CHECK-NEXT:    v_writelane_b32 v0, s5, 7
-; CHECK-NEXT:    v_writelane_b32 v0, s6, 8
-; CHECK-NEXT:    v_writelane_b32 v0, s7, 9
-; CHECK-NEXT:    v_writelane_b32 v0, s8, 10
-; CHECK-NEXT:    v_writelane_b32 v0, s9, 11
-; CHECK-NEXT:    v_writelane_b32 v0, s10, 12
-; CHECK-NEXT:    v_writelane_b32 v0, s11, 13
-; CHECK-NEXT:    v_writelane_b32 v0, s12, 14
-; CHECK-NEXT:    v_writelane_b32 v0, s13, 15
-; CHECK-NEXT:    v_writelane_b32 v0, s14, 16
-; CHECK-NEXT:    v_writelane_b32 v0, s15, 17
+; CHECK-NEXT:    v_writelane_b32 v23, s0, 2
+; CHECK-NEXT:    v_writelane_b32 v23, s1, 3
+; CHECK-NEXT:    v_writelane_b32 v23, s2, 4
+; CHECK-NEXT:    v_writelane_b32 v23, s3, 5
+; CHECK-NEXT:    v_writelane_b32 v23, s4, 6
+; CHECK-NEXT:    v_writelane_b32 v23, s5, 7
+; CHECK-NEXT:    v_writelane_b32 v23, s6, 8
+; CHECK-NEXT:    v_writelane_b32 v23, s7, 9
+; CHECK-NEXT:    v_writelane_b32 v23, s8, 10
+; CHECK-NEXT:    v_writelane_b32 v23, s9, 11
+; CHECK-NEXT:    v_writelane_b32 v23, s10, 12
+; CHECK-NEXT:    v_writelane_b32 v23, s11, 13
+; CHECK-NEXT:    v_writelane_b32 v23, s12, 14
+; CHECK-NEXT:    v_writelane_b32 v23, s13, 15
+; CHECK-NEXT:    v_writelane_b32 v23, s14, 16
+; CHECK-NEXT:    v_writelane_b32 v23, s15, 17
 ; CHECK-NEXT:    ;;#ASMSTART
 ; CHECK-NEXT:    ; def s[0:1]
 ; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    v_writelane_b32 v0, s0, 18
-; CHECK-NEXT:    v_writelane_b32 v0, s1, 19
+; CHECK-NEXT:    v_writelane_b32 v23, s0, 18
+; CHECK-NEXT:    v_writelane_b32 v23, s1, 19
 ; CHECK-NEXT:    ;;#ASMSTART
 ; CHECK-NEXT:    ; def s[0:3]
 ; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    v_writelane_b32 v0, s0, 20
-; CHECK-NEXT:    v_writelane_b32 v0, s1, 21
-; CHECK-NEXT:    v_writelane_b32 v0, s2, 22
-; CHECK-NEXT:    v_writelane_b32 v0, s3, 23
+; CHECK-NEXT:    v_writelane_b32 v23, s0, 20
+; CHECK-NEXT:    v_writelane_b32 v23, s1, 21
+; CHECK-NEXT:    v_writelane_b32 v23, s2, 22
+; CHECK-NEXT:    v_writelane_b32 v23, s3, 23
 ; CHECK-NEXT:    ;;#ASMSTART
 ; CHECK-NEXT:    ; def s[0:7]
 ; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    v_writelane_b32 v0, s0, 24
-; CHECK-NEXT:    v_writelane_b32 v0, s1, 25
-; CHECK-NEXT:    v_writelane_b32 v0, s2, 26
-; CHECK-NEXT:    v_writelane_b32 v0, s3, 27
-; CHECK-NEXT:    v_writelane_b32 v0, s4, 28
-; CHECK-NEXT:    v_writelane_b32 v0, s5, 29
-; CHECK-NEXT:    v_writelane_b32 v0, s6, 30
-; CHECK-NEXT:    v_writelane_b32 v0, s7, 31
+; CHECK-NEXT:    v_writelane_b32 v23, s0, 24
+; CHECK-NEXT:    v_writelane_b32 v23, s1, 25
+; CHECK-NEXT:    v_writelane_b32 v23, s2, 26
+; CHECK-NEXT:    v_writelane_b32 v23, s3, 27
+; CHECK-NEXT:    v_writelane_b32 v23, s4, 28
+; CHECK-NEXT:    v_writelane_b32 v23, s5, 29
+; CHECK-NEXT:    v_writelane_b32 v23, s6, 30
+; CHECK-NEXT:    v_writelane_b32 v23, s7, 31
 ; CHECK-NEXT:    ;;#ASMSTART
 ; CHECK-NEXT:    ; def s[0:15]
 ; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    v_writelane_b32 v0, s0, 32
-; CHECK-NEXT:    v_writelane_b32 v0, s1, 33
-; CHECK-NEXT:    v_writelane_b32 v0, s2, 34
-; CHECK-NEXT:    v_writelane_b32 v0, s3, 35
-; CHECK-NEXT:    v_writelane_b32 v0, s4, 36
-; CHECK-NEXT:    v_writelane_b32 v0, s5, 37
-; CHECK-NEXT:    v_writelane_b32 v0, s6, 38
-; CHECK-NEXT:    v_writelane_b32 v0, s7, 39
-; CHECK-NEXT:    v_writelane_b32 v0, s8, 40
-; CHECK-NEXT:    v_writelane_b32 v0, s9, 41
-; CHECK-NEXT:    v_writelane_b32 v0, s10, 42
-; CHECK-NEXT:    v_writelane_b32 v0, s11, 43
-; CHECK-NEXT:    v_writelane_b32 v0, s12, 44
-; CHECK-NEXT:    v_writelane_b32 v0, s13, 45
-; CHECK-NEXT:    v_writelane_b32 v0, s14, 46
-; CHECK-NEXT:    v_writelane_b32 v0, s15, 47
+; CHECK-NEXT:    v_writelane_b32 v23, s0, 32
+; CHECK-NEXT:    v_writelane_b32 v23, s1, 33
+; CHECK-NEXT:    v_writelane_b32 v23, s2, 34
+; CHECK-NEXT:    v_writelane_b32 v23, s3, 35
+; CHECK-NEXT:    v_writelane_b32 v23, s4, 36
+; CHECK-NEXT:    v_writelane_b32 v23, s5, 37
+; CHECK-NEXT:    v_writelane_b32 v23, s6, 38
+; CHECK-NEXT:    v_writelane_b32 v23, s7, 39
+; CHECK-NEXT:    v_writelane_b32 v23, s8, 40
+; CHECK-NEXT:    v_writelane_b32 v23, s9, 41
+; CHECK-NEXT:    v_writelane_b32 v23, s10, 42
+; CHECK-NEXT:    v_writelane_b32 v23, s11, 43
+; CHECK-NEXT:    v_writelane_b32 v23, s12, 44
+; CHECK-NEXT:    v_writelane_b32 v23, s13, 45
+; CHECK-NEXT:    v_writelane_b32 v23, s14, 46
+; CHECK-NEXT:    v_writelane_b32 v23, s15, 47
 ; CHECK-NEXT:    s_cbranch_scc0 .LBB0_2
 ; CHECK-NEXT:  ; %bb.1: ; %ret
-; CHECK-NEXT:    ; kill: killed $vgpr23
-; CHECK-NEXT:    ; kill: killed $vgpr0
 ; CHECK-NEXT:    s_endpgm
 ; CHECK-NEXT:  .LBB0_2: ; %bb0
-; CHECK-NEXT:    v_readlane_b32 s0, v23, 0
-; CHECK-NEXT:    v_readlane_b32 s1, v23, 1
+; CHECK-NEXT:    v_readlane_b32 s0, v22, 0
+; CHECK-NEXT:    v_readlane_b32 s1, v22, 1
 ; CHECK-NEXT:    ;;#ASMSTART
 ; CHECK-NEXT:    ; use s[0:1]
 ; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    v_readlane_b32 s0, v23, 2
-; CHECK-NEXT:    v_readlane_b32 s1, v23, 3
-; CHECK-NEXT:    v_readlane_b32 s2, v23, 4
-; CHECK-NEXT:    v_readlane_b32 s3, v23, 5
+; CHECK-NEXT:    v_readlane_b32 s0, v22, 2
+; CHECK-NEXT:    v_readlane_b32 s1, v22, 3
+; CHECK-NEXT:    v_readlane_b32 s2, v22, 4
+; CHECK-NEXT:    v_readlane_b32 s3, v22, 5
 ; CHECK-NEXT:    ;;#ASMSTART
 ; CHECK-NEXT:    ; use s[0:3]
 ; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    v_readlane_b32 s0, v23, 6
-; CHECK-NEXT:    v_readlane_b32 s1, v23, 7
-; CHECK-NEXT:    v_readlane_b32 s2, v23, 8
-; CHECK-NEXT:    v_readlane_b32 s3, v23, 9
-; CHECK-NEXT:    v_readlane_b32 s4, v23, 10
-; CHECK-NEXT:    v_readlane_b32 s5, v23, 11
-; CHECK-NEXT:    v_readlane_b32 s6, v23, 12
-; CHECK-NEXT:    v_readlane_b32 s7, v23, 13
+; CHECK-NEXT:    v_readlane_b32 s0, v22, 6
+; CHECK-NEXT:    v_readlane_b32 s1, v22, 7
+; CHECK-NEXT:    v_readlane_b32 s2, v22, 8
+; CHECK-NEXT:    v_readlane_b32 s3, v22, 9
+; CHECK-NEXT:    v_readlane_b32 s4, v22, 10
+; CHECK-NEXT:    v_readlane_b32 s5, v22, 11
+; CHECK-NEXT:    v_readlane_b32 s6, v22, 12
+; CHECK-NEXT:    v_readlane_b32 s7, v22, 13
 ; CHECK-NEXT:    ;;#ASMSTART
 ; CHECK-NEXT:    ; use s[0:7]
 ; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    v_readlane_b32 s0, v23, 14
-; CHECK-NEXT:    v_readlane_b32 s1, v23, 15
-; CHECK-NEXT:    v_readlane_b32 s2, v23, 16
-; CHECK-NEXT:    v_readlane_b32 s3, v23, 17
-; CHECK-NEXT:    v_readlane_b32 s4, v23, 18
-; CHECK-NEXT:    v_readlane_b32 s5, v23, 19
-; CHECK-NEXT:    v_readlane_b32 s6, v23, 20
-; CHECK-NEXT:    v_readlane_b32 s7, v23, 21
-; CHECK-NEXT:    v_readlane_b32 s8, v23, 22
-; CHECK-NEXT:    v_readlane_b32 s9, v23, 23
-; CHECK-NEXT:    v_readlane_b32 s10, v23, 24
-; CHECK-NEXT:    v_readlane_b32 s11, v23, 25
-; CHECK-NEXT:    v_readlane_b32 s12, v23, 26
-; CHECK-NEXT:    v_readlane_b32 s13, v23, 27
-; CHECK-NEXT:    v_readlane_b32 s14, v23, 28
-; CHECK-NEXT:    v_readlane_b32 s15, v23, 29
+; CHECK-NEXT:    v_readlane_b32 s0, v22, 14
+; CHECK-NEXT:    v_readlane_b32 s1, v22, 15
+; CHECK-NEXT:    v_readlane_b32 s2, v22, 16
+; CHECK-NEXT:    v_readlane_b32 s3, v22, 17
+; CHECK-NEXT:    v_readlane_b32 s4, v22, 18
+; CHECK-NEXT:    v_readlane_b32 s5, v22, 19
+; CHECK-NEXT:    v_readlane_b32 s6, v22, 20
+; CHECK-NEXT:    v_readlane_b32 s7, v22, 21
+; CHECK-NEXT:    v_readlane_b32 s8, v22, 22
+; CHECK-NEXT:    v_readlane_b32 s9, v22, 23
+; CHECK-NEXT:    v_readlane_b32 s10, v22, 24
+; CHECK-NEXT:    v_readlane_b32 s11, v22, 25
+; CHECK-NEXT:    v_readlane_b32 s12, v22, 26
+; CHECK-NEXT:    v_readlane_b32 s13, v22, 27
+; CHECK-NEXT:    v_readlane_b32 s14, v22, 28
+; CHECK-NEXT:    v_readlane_b32 s15, v22, 29
 ; CHECK-NEXT:    ;;#ASMSTART
 ; CHECK-NEXT:    ; use s[0:15]
 ; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    v_readlane_b32 s0, v23, 30
-; CHECK-NEXT:    v_readlane_b32 s1, v23, 31
-; CHECK-NEXT:    v_readlane_b32 s2, v23, 32
-; CHECK-NEXT:    v_readlane_b32 s3, v23, 33
-; CHECK-NEXT:    v_readlane_b32 s4, v23, 34
-; CHECK-NEXT:    v_readlane_b32 s5, v23, 35
-; CHECK-NEXT:    v_readlane_b32 s6, v23, 36
-; CHECK-NEXT:    v_readlane_b32 s7, v23, 37
+; CHECK-NEXT:    v_readlane_b32 s0, v22, 30
+; CHECK-NEXT:    v_readlane_b32 s1, v22, 31
+; CHECK-NEXT:    v_readlane_b32 s2, v22, 32
+; CHECK-NEXT:    v_readlane_b32 s3, v22, 33
+; CHECK-NEXT:    v_readlane_b32 s4, v22, 34
+; CHECK-NEXT:    v_readlane_b32 s5, v22, 35
+; CHECK-NEXT:    v_readlane_b32 s6, v22, 36
+; CHECK-NEXT:    v_readlane_b32 s7, v22, 37
 ; CHECK-NEXT:    ;;#ASMSTART
 ; CHECK-NEXT:    ; use s[42:43]
 ; CHECK-NEXT:    ;;#ASMEND
@@ -262,10 +256,10 @@ define amdgpu_kernel void @kernel0(ptr addrspace(1) %out, i32 %in) #1 {
 ; CHECK-NEXT:    ;;#ASMSTART
 ; CHECK-NEXT:    ; use s[0:7]
 ; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    v_readlane_b32 s0, v23, 38
-; CHECK-NEXT:    v_readlane_b32 s1, v23, 39
-; CHECK-NEXT:    v_readlane_b32 s2, v23, 40
-; CHECK-NEXT:    v_readlane_b32 s3, v23, 41
+; CHECK-NEXT:    v_readlane_b32 s0, v22, 38
+; CHECK-NEXT:    v_readlane_b32 s1, v22, 39
+; CHECK-NEXT:    v_readlane_b32 s2, v22, 40
+; CHECK-NEXT:    v_readlane_b32 s3, v22, 41
 ; CHECK-NEXT:    ;;#ASMSTART
 ; CHECK-NEXT:    ; use s[16:31]
 ; CHECK-NEXT:    ;;#ASMEND
@@ -278,111 +272,108 @@ define amdgpu_kernel void @kernel0(ptr addrspace(1) %out, i32 %in) #1 {
 ; CHECK-NEXT:    ;;#ASMSTART
 ; CHECK-NEXT:    ; use s[44:51]
 ; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    v_readlane_b32 s4, v23, 42
-; CHECK-NEXT:    v_readlane_b32 s5, v23, 43
-; CHECK-NEXT:    v_readlane_b32 s6, v23, 44
-; CHECK-NEXT:    v_readlane_b32 s7, v23, 45
-; CHECK-NEXT:    v_readlane_b32 s8, v23, 46
-; CHECK-NEXT:    v_readlane_b32 s9, v23, 47
-; CHECK-NEXT:    v_readlane_b32 s10, v23, 48
-; CHECK-NEXT:    v_readlane_b32 s11, v23, 49
-; CHECK-NEXT:    v_readlane_b32 s12, v23, 50
-; CHECK-NEXT:    v_readlane_b32 s13, v23, 51
-; CHECK-NEXT:    v_readlane_b32 s14, v23, 52
-; CHECK-NEXT:    v_readlane_b32 s15, v23, 53
+; CHECK-NEXT:    v_readlane_b32 s4, v22, 42
+; CHECK-NEXT:    v_readlane_b32 s5, v22, 43
+; CHECK-NEXT:    v_readlane_b32 s6, v22, 44
+; CHECK-NEXT:    v_readlane_b32 s7, v22, 45
+; CHECK-NEXT:    v_readlane_b32 s8, v22, 46
+; CHECK-NEXT:    v_readlane_b32 s9, v22, 47
+; CHECK-NEXT:    v_readlane_b32 s10, v22, 48
+; CHECK-NEXT:    v_readlane_b32 s11, v22, 49
+; CHECK-NEXT:    v_readlane_b32 s12, v22, 50
+; CHECK-NEXT:    v_readlane_b32 s13, v22, 51
+; CHECK-NEXT:    v_readlane_b32 s14, v22, 52
+; CHECK-NEXT:    v_readlane_b32 s15, v22, 53
 ; CHECK-NEXT:    ;;#ASMSTART
 ; CHECK-NEXT:    ; use s[0:15]
 ; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    v_readlane_b32 s0, v23, 54
-; CHECK-NEXT:    v_readlane_b32 s1, v23, 55
-; CHECK-NEXT:    v_readlane_b32 s2, v23, 56
-; CHECK-NEXT:    v_readlane_b32 s3, v23, 57
+; CHECK-NEXT:    v_readlane_b32 s0, v22, 54
+; CHECK-NEXT:    v_readlane_b32 s1, v22, 55
+; CHECK-NEXT:    v_readlane_b32 s2, v22, 56
+; CHECK-NEXT:    v_readlane_b32 s3, v22, 57
 ; CHECK-NEXT:    ;;#ASMSTART
 ; CHECK-NEXT:    ; use s[34:35]
 ; CHECK-NEXT:    ;;#ASMEND
 ; CHECK-NEXT:    ;;#ASMSTART
 ; CHECK-NEXT:    ; use s[0:3]
 ; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    v_readlane_b32 s0, v23, 58
-; CHECK-NEXT:    v_readlane_b32 s1, v23, 59
-; CHECK-NEXT:    v_readlane_b32 s2, v23, 60
-; CHECK-NEXT:    v_readlane_b32 s3, v23, 61
-; CHECK-NEXT:    v_readlane_b32 s4, v23, 62
-; CHECK-NEXT:    v_readlane_b32 s5, v23, 63
-; CHECK-NEXT:    v_readlane_b32 s6, v0, 0
-; CHECK-NEXT:    v_readlane_b32 s7, v0, 1
+; CHECK-NEXT:    v_readlane_b32 s0, v22, 58
+; CHECK-NEXT:    v_readlane_b32 s1, v22, 59
+; CHECK-NEXT:    v_readlane_b32 s2, v22, 60
+; CHECK-NEXT:    v_readlane_b32 s3, v22, 61
+; CHECK-NEXT:    v_readlane_b32 s4, v22, 62
+; CHECK-NEXT:    v_readlane_b32 s5, v22, 63
+; CHECK-NEXT:    v_readlane_b32 s6, v23, 0
+; CHECK-NEXT:    v_readlane_b32 s7, v23, 1
 ; CHECK-NEXT:    ;;#ASMSTART
 ; CHECK-NEXT:    ; use s[0:7]
 ; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    v_readlane_b32 s0, v0, 2
-; CHECK-NEXT:    v_readlane_b32 s1, v0, 3
-; CHECK-NEXT:    v_readlane_b32 s2, v0, 4
-; CHECK-NEXT:    v_readlane_b32 s3, v0, 5
-; CHECK-NEXT:    v_readlane_b32 s4, v0, 6
-; CHECK-NEXT:    v_readlane_b32 s5, v0, 7
-; CHECK-NEXT:    v_readlane_b32 s6, v0, 8
-; CHECK-NEXT:    v_readlane_b32 s7, v0, 9
-; CHECK-NEXT:    v_readlane_b32 s8, v0, 10
-; CHECK-NEXT:    v_readlane_b32 s9, v0, 11
-; CHECK-NEXT:    v_readlane_b32 s10, v0, 12
-; CHECK-NEXT:    v_readlane_b32 s11, v0, 13
-; CHECK-NEXT:    v_readlane_b32 s12, v0, 14
-; CHECK-NEXT:    v_readlane_b32 s13, v0, 15
-; CHECK-NEXT:    v_readlane_b32 s14, v0, 16
-; CHECK-NEXT:    v_readlane_b32 s15, v0, 17
+; CHECK-NEXT:    v_readlane_b32 s0, v23, 2
+; CHECK-NEXT:    v_readlane_b32 s1, v23, 3
+; CHECK-NEXT:    v_readlane_b32 s2, v23, 4
+; CHECK-NEXT:    v_readlane_b32 s3, v23, 5
+; CHECK-NEXT:    v_readlane_b32 s4, v23, 6
+; CHECK-NEXT:    v_readlane_b32 s5, v23, 7
+; CHECK-NEXT:    v_readlane_b32 s6, v23, 8
+; CHECK-NEXT:    v_readlane_b32 s7, v23, 9
+; CHECK-NEXT:    v_readlane_b32 s8, v23, 10
+; CHECK-NEXT:    v_readlane_b32 s9, v23, 11
+; CHECK-NEXT:    v_readlane_b32 s10, v23, 12
+; CHECK-NEXT:    v_readlane_b32 s11, v23, 13
+; CHECK-NEXT:    v_readlane_b32 s12, v23, 14
+; CHECK-NEXT:    v_readlane_b32 s13, v23, 15
+; CHECK-NEXT:    v_readlane_b32 s14, v23, 16
+; CHECK-NEXT:    v_readlane_b32 s15, v23, 17
 ; CHECK-NEXT:    ;;#ASMSTART
 ; CHECK-NEXT:    ; use s[0:15]
 ; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    v_readlane_b32 s0, v0, 18
-; CHECK-NEXT:    v_readlane_b32 s1, v0, 19
+; CHECK-NEXT:    v_readlane_b32 s0, v23, 18
+; CHECK-NEXT:    v_readlane_b32 s1, v23, 19
 ; CHECK-NEXT:    ;;#ASMSTART
 ; CHECK-NEXT:    ; use s[0:1]
 ; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    v_readlane_b32 s0, v0, 20
-; CHECK-NEXT:    v_readlane_b32 s1, v0, 21
-; CHECK-NEXT:    v_readlane_b32 s2, v0, 22
-; CHECK-NEXT:    v_readlane_b32 s3, v0, 23
+; CHECK-NEXT:    v_readlane_b32 s0, v23, 20
+; CHECK-NEXT:    v_readlane_b32 s1, v23, 21
+; CHECK-NEXT:    v_readlane_b32 s2, v23, 22
+; CHECK-NEXT:    v_readlane_b32 s3, v23, 23
 ; CHECK-NEXT:    ;;#ASMSTART
 ; CHECK-NEXT:    ; use s[0:3]
 ; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    v_readlane_b32 s0, v0, 24
-; CHECK-NEXT:    v_readlane_b32 s1, v0, 25
-; CHECK-NEXT:    v_readlane_b32 s2, v0, 26
-; CHECK-NEXT:    v_readlane_b32 s3, v0, 27
-; CHECK-NEXT:    v_readlane_b32 s4, v0, 28
-; CHECK-NEXT:    v_readlane_b32 s5, v0, 29
-; CHECK-NEXT:    v_readlane_b32 s6, v0, 30
-; CHECK-NEXT:    v_readlane_b32 s7, v0, 31
+; CHECK-NEXT:    v_readlane_b32 s0, v23, 24
+; CHECK-NEXT:    v_readlane_b32 s1, v23, 25
+; CHECK-NEXT:    v_readlane_b32 s2, v23, 26
+; CHECK-NEXT:    v_readlane_b32 s3, v23, 27
+; CHECK-NEXT:    v_readlane_b32 s4, v23, 28
+; CHECK-NEXT:    v_readlane_b32 s5, v23, 29
+; CHECK-NEXT:    v_readlane_b32 s6, v23, 30
+; CHECK-NEXT:    v_readlane_b32 s7, v23, 31
 ; CHECK-NEXT:    ;;#ASMSTART
 ; CHECK-NEXT:    ; use s[0:7]
 ; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    v_readlane_b32 s0, v0, 32
-; CHECK-NEXT:    v_readlane_b32 s1, v0, 33
-; CHECK-NEXT:    v_readlane_b32 s2, v0, 34
-; CHECK-NEXT:    v_readlane_b32 s3, v0, 35
-; CHECK-NEXT:    v_readlane_b32 s4, v0, 36
-; CHECK-NEXT:    v_readlane_b32 s5, v0, 37
-; CHECK-NEXT:    v_readlane_b32 s6, v0, 38
-; CHECK-NEXT:    v_readlane_b32 s7, v0, 39
-; CHECK-NEXT:    v_readlane_b32 s8, v0, 40
-; CHECK-NEXT:    v_readlane_b32 s9, v0, 41
-; CHECK-NEXT:    v_readlane_b32 s10, v0, 42
-; CHECK-NEXT:    v_readlane_b32 s11, v0, 43
-; CHECK-NEXT:    v_readlane_b32 s12, v0, 44
-; CHECK-NEXT:    v_readlane_b32 s13, v0, 45
-; CHECK-NEXT:    v_readlane_b32 s14, v0, 46
-; CHECK-NEXT:    v_readlane_b32 s15, v0, 47
+; CHECK-NEXT:    v_readlane_b32 s0, v23, 32
+; CHECK-NEXT:    v_readlane_b32 s1, v23, 33
+; CHECK-NEXT:    v_readlane_b32 s2, v23, 34
+; CHECK-NEXT:    v_readlane_b32 s3, v23, 35
+; CHECK-NEXT:    v_readlane_b32 s4, v23, 36
+; CHECK-NEXT:    v_readlane_b32 s5, v23, 37
+; CHECK-NEXT:    v_readlane_b32 s6, v23, 38
+; CHECK-NEXT:    v_readlane_b32 s7, v23, 39
+; CHECK-NEXT:    v_readlane_b32 s8, v23, 40
+; CHECK-NEXT:    v_readlane_b32 s9, v23, 41
+; CHECK-NEXT:    v_readlane_b32 s10, v23, 42
+; CHECK-NEXT:    v_readlane_b32 s11, v23, 43
+; CHECK-NEXT:    v_readlane_b32 s12, v23, 44
+; CHECK-NEXT:    v_readlane_b32 s13, v23, 45
+; CHECK-NEXT:    v_readlane_b32 s14, v23, 46
+; CHECK-NEXT:    v_readlane_b32 s15, v23, 47
 ; CHECK-NEXT:    ;;#ASMSTART
 ; CHECK-NEXT:    ; use s[0:15]
 ; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    ; kill: killed $vgpr23
-; CHECK-NEXT:    ; kill: killed $vgpr0
 ; CHECK-NEXT:    s_endpgm
   call void asm sideeffect "", "~{v[0:7]}" () #0
   call void asm sideeffect "", "~{v[8:15]}" () #0
   call void asm sideeffect "", "~{v[16:19]}"() #0
   call void asm sideeffect "", "~{v[20:21]}"() #0
-  call void asm sideeffect "", "~{v22}"() #0
 
   %val0 = call <2 x i32> asm sideeffect "; def $0", "=s" () #0
   %val1 = call <4 x i32> asm sideeffect "; def $0", "=s" () #0
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-incorrect-fi-bookkeeping-bug.ll b/llvm/test/CodeGen/AMDGPU/sgpr-spill-incorrect-fi-bookkeeping-bug.ll
index a91c047093b95..0af4b0c6269e7 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-incorrect-fi-bookkeeping-bug.ll
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-incorrect-fi-bookkeeping-bug.ll
@@ -1,17 +1,372 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s
 
-; XFAIL: *
-
 ; This tests for a bug that caused a crash in SIRegisterInfo::spillSGPR()
 ; which was due to incorrect book-keeping of removed dead frame indices.
 
 ; CHECK-LABEL: {{^}}kernel0:
 define amdgpu_kernel void @kernel0(ptr addrspace(1) %out, i32 %in) #1 {
+; CHECK-LABEL: kernel0:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; def s[34:35]
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; def s[0:3]
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; def s[24:31]
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; def s[8:23]
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; def s[4:5]
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; def s[36:39]
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; def s[40:47]
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ; implicit-def: $vgpr22 : SGPR spill to VGPR lane
+; CHECK-NEXT:    ; implicit-def: $vgpr23 : SGPR spill to VGPR lane
+; CHECK-NEXT:    v_writelane_b32 v22, s40, 0
+; CHECK-NEXT:    v_writelane_b32 v22, s41, 1
+; CHECK-NEXT:    v_writelane_b32 v22, s42, 2
+; CHECK-NEXT:    v_writelane_b32 v22, s43, 3
+; CHECK-NEXT:    v_writelane_b32 v22, s44, 4
+; CHECK-NEXT:    v_writelane_b32 v22, s45, 5
+; CHECK-NEXT:    v_writelane_b32 v22, s46, 6
+; CHECK-NEXT:    v_writelane_b32 v22, s47, 7
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; def s[40:55]
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    v_writelane_b32 v22, s40, 8
+; CHECK-NEXT:    v_writelane_b32 v22, s41, 9
+; CHECK-NEXT:    v_writelane_b32 v22, s42, 10
+; CHECK-NEXT:    v_writelane_b32 v22, s43, 11
+; CHECK-NEXT:    v_writelane_b32 v22, s44, 12
+; CHECK-NEXT:    v_writelane_b32 v22, s45, 13
+; CHECK-NEXT:    v_writelane_b32 v22, s46, 14
+; CHECK-NEXT:    v_writelane_b32 v22, s47, 15
+; CHECK-NEXT:    v_writelane_b32 v22, s48, 16
+; CHECK-NEXT:    v_writelane_b32 v22, s49, 17
+; CHECK-NEXT:    v_writelane_b32 v22, s50, 18
+; CHECK-NEXT:    v_writelane_b32 v22, s51, 19
+; CHECK-NEXT:    v_writelane_b32 v22, s52, 20
+; CHECK-NEXT:    v_writelane_b32 v22, s53, 21
+; CHECK-NEXT:    v_writelane_b32 v22, s54, 22
+; CHECK-NEXT:    v_writelane_b32 v22, s55, 23
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; def s[6:7]
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; def s[40:43]
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    v_writelane_b32 v22, s40, 24
+; CHECK-NEXT:    v_writelane_b32 v22, s41, 25
+; CHECK-NEXT:    v_writelane_b32 v22, s42, 26
+; CHECK-NEXT:    v_writelane_b32 v22, s43, 27
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; def s[40:47]
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    v_writelane_b32 v22, s40, 28
+; CHECK-NEXT:    v_writelane_b32 v22, s41, 29
+; CHECK-NEXT:    v_writelane_b32 v22, s42, 30
+; CHECK-NEXT:    v_writelane_b32 v22, s43, 31
+; CHECK-NEXT:    v_writelane_b32 v22, s44, 32
+; CHECK-NEXT:    v_writelane_b32 v22, s45, 33
+; CHECK-NEXT:    v_writelane_b32 v22, s46, 34
+; CHECK-NEXT:    v_writelane_b32 v22, s47, 35
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; def s[40:55]
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    v_writelane_b32 v22, s40, 36
+; CHECK-NEXT:    v_writelane_b32 v22, s41, 37
+; CHECK-NEXT:    v_writelane_b32 v22, s42, 38
+; CHECK-NEXT:    v_writelane_b32 v22, s43, 39
+; CHECK-NEXT:    v_writelane_b32 v22, s44, 40
+; CHECK-NEXT:    v_writelane_b32 v22, s45, 41
+; CHECK-NEXT:    v_writelane_b32 v22, s46, 42
+; CHECK-NEXT:    v_writelane_b32 v22, s47, 43
+; CHECK-NEXT:    v_writelane_b32 v22, s48, 44
+; CHECK-NEXT:    v_writelane_b32 v22, s49, 45
+; CHECK-NEXT:    v_writelane_b32 v22, s50, 46
+; CHECK-NEXT:    v_writelane_b32 v22, s51, 47
+; CHECK-NEXT:    v_writelane_b32 v22, s52, 48
+; CHECK-NEXT:    v_writelane_b32 v22, s53, 49
+; CHECK-NEXT:    v_writelane_b32 v22, s54, 50
+; CHECK-NEXT:    v_writelane_b32 v22, s55, 51
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; def s[40:41]
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    v_writelane_b32 v22, s40, 52
+; CHECK-NEXT:    v_writelane_b32 v22, s41, 53
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; def s[40:43]
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    v_writelane_b32 v22, s40, 54
+; CHECK-NEXT:    v_writelane_b32 v22, s41, 55
+; CHECK-NEXT:    v_writelane_b32 v22, s42, 56
+; CHECK-NEXT:    v_writelane_b32 v22, s43, 57
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; def s[40:47]
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    v_writelane_b32 v22, s40, 58
+; CHECK-NEXT:    v_writelane_b32 v22, s41, 59
+; CHECK-NEXT:    v_writelane_b32 v22, s42, 60
+; CHECK-NEXT:    v_writelane_b32 v22, s43, 61
+; CHECK-NEXT:    v_writelane_b32 v22, s44, 62
+; CHECK-NEXT:    v_writelane_b32 v23, s46, 0
+; CHECK-NEXT:    v_writelane_b32 v22, s45, 63
+; CHECK-NEXT:    v_writelane_b32 v23, s47, 1
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; def s[40:55]
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    v_writelane_b32 v23, s40, 2
+; CHECK-NEXT:    v_writelane_b32 v23, s41, 3
+; CHECK-NEXT:    v_writelane_b32 v23, s42, 4
+; CHECK-NEXT:    v_writelane_b32 v23, s43, 5
+; CHECK-NEXT:    v_writelane_b32 v23, s44, 6
+; CHECK-NEXT:    v_writelane_b32 v23, s45, 7
+; CHECK-NEXT:    v_writelane_b32 v23, s46, 8
+; CHECK-NEXT:    v_writelane_b32 v23, s47, 9
+; CHECK-NEXT:    v_writelane_b32 v23, s48, 10
+; CHECK-NEXT:    v_writelane_b32 v23, s49, 11
+; CHECK-NEXT:    v_writelane_b32 v23, s50, 12
+; CHECK-NEXT:    v_writelane_b32 v23, s51, 13
+; CHECK-NEXT:    v_writelane_b32 v23, s52, 14
+; CHECK-NEXT:    v_writelane_b32 v23, s53, 15
+; CHECK-NEXT:    v_writelane_b32 v23, s54, 16
+; CHECK-NEXT:    v_writelane_b32 v23, s55, 17
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; def s[40:41]
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    v_writelane_b32 v23, s40, 18
+; CHECK-NEXT:    v_writelane_b32 v23, s41, 19
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; def s[40:43]
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    v_writelane_b32 v23, s40, 20
+; CHECK-NEXT:    v_writelane_b32 v23, s41, 21
+; CHECK-NEXT:    v_writelane_b32 v23, s42, 22
+; CHECK-NEXT:    v_writelane_b32 v23, s43, 23
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; def s[40:47]
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    v_writelane_b32 v23, s40, 24
+; CHECK-NEXT:    v_writelane_b32 v23, s41, 25
+; CHECK-NEXT:    v_writelane_b32 v23, s42, 26
+; CHECK-NEXT:    v_writelane_b32 v23, s43, 27
+; CHECK-NEXT:    v_writelane_b32 v23, s44, 28
+; CHECK-NEXT:    v_writelane_b32 v23, s45, 29
+; CHECK-NEXT:    v_writelane_b32 v23, s46, 30
+; CHECK-NEXT:    v_writelane_b32 v23, s47, 31
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; def s[40:55]
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    v_writelane_b32 v23, s40, 32
+; CHECK-NEXT:    v_writelane_b32 v23, s41, 33
+; CHECK-NEXT:    v_writelane_b32 v23, s42, 34
+; CHECK-NEXT:    v_writelane_b32 v23, s43, 35
+; CHECK-NEXT:    v_writelane_b32 v23, s44, 36
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; use s[34:35]
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; use s[0:3]
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; use s[24:31]
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; use s[8:23]
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    v_readlane_b32 s8, v22, 0
+; CHECK-NEXT:    v_writelane_b32 v23, s45, 37
+; CHECK-NEXT:    v_readlane_b32 s9, v22, 1
+; CHECK-NEXT:    v_readlane_b32 s10, v22, 2
+; CHECK-NEXT:    v_readlane_b32 s11, v22, 3
+; CHECK-NEXT:    v_readlane_b32 s12, v22, 4
+; CHECK-NEXT:    v_readlane_b32 s13, v22, 5
+; CHECK-NEXT:    v_readlane_b32 s14, v22, 6
+; CHECK-NEXT:    v_readlane_b32 s15, v22, 7
+; CHECK-NEXT:    v_readlane_b32 s0, v22, 24
+; CHECK-NEXT:    v_writelane_b32 v23, s46, 38
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; use s[4:5]
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; use s[36:39]
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; use s[8:15]
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    v_readlane_b32 s8, v22, 8
+; CHECK-NEXT:    v_readlane_b32 s1, v22, 25
+; CHECK-NEXT:    v_readlane_b32 s2, v22, 26
+; CHECK-NEXT:    v_readlane_b32 s3, v22, 27
+; CHECK-NEXT:    v_writelane_b32 v23, s47, 39
+; CHECK-NEXT:    v_readlane_b32 s9, v22, 9
+; CHECK-NEXT:    v_readlane_b32 s10, v22, 10
+; CHECK-NEXT:    v_readlane_b32 s11, v22, 11
+; CHECK-NEXT:    v_readlane_b32 s12, v22, 12
+; CHECK-NEXT:    v_readlane_b32 s13, v22, 13
+; CHECK-NEXT:    v_readlane_b32 s14, v22, 14
+; CHECK-NEXT:    v_readlane_b32 s15, v22, 15
+; CHECK-NEXT:    v_readlane_b32 s16, v22, 16
+; CHECK-NEXT:    v_readlane_b32 s17, v22, 17
+; CHECK-NEXT:    v_readlane_b32 s18, v22, 18
+; CHECK-NEXT:    v_readlane_b32 s19, v22, 19
+; CHECK-NEXT:    v_readlane_b32 s20, v22, 20
+; CHECK-NEXT:    v_readlane_b32 s21, v22, 21
+; CHECK-NEXT:    v_readlane_b32 s22, v22, 22
+; CHECK-NEXT:    v_readlane_b32 s23, v22, 23
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; use s[8:23]
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; use s[6:7]
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; use s[0:3]
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    v_readlane_b32 s0, v22, 28
+; CHECK-NEXT:    v_writelane_b32 v23, s48, 40
+; CHECK-NEXT:    v_readlane_b32 s1, v22, 29
+; CHECK-NEXT:    v_readlane_b32 s2, v22, 30
+; CHECK-NEXT:    v_readlane_b32 s3, v22, 31
+; CHECK-NEXT:    v_readlane_b32 s4, v22, 32
+; CHECK-NEXT:    v_readlane_b32 s5, v22, 33
+; CHECK-NEXT:    v_readlane_b32 s6, v22, 34
+; CHECK-NEXT:    v_readlane_b32 s7, v22, 35
+; CHECK-NEXT:    v_writelane_b32 v23, s49, 41
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; use s[0:7]
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    v_readlane_b32 s0, v22, 36
+; CHECK-NEXT:    v_writelane_b32 v23, s50, 42
+; CHECK-NEXT:    v_readlane_b32 s1, v22, 37
+; CHECK-NEXT:    v_writelane_b32 v23, s51, 43
+; CHECK-NEXT:    v_readlane_b32 s2, v22, 38
+; CHECK-NEXT:    v_readlane_b32 s3, v22, 39
+; CHECK-NEXT:    v_readlane_b32 s4, v22, 40
+; CHECK-NEXT:    v_readlane_b32 s5, v22, 41
+; CHECK-NEXT:    v_readlane_b32 s6, v22, 42
+; CHECK-NEXT:    v_readlane_b32 s7, v22, 43
+; CHECK-NEXT:    v_readlane_b32 s8, v22, 44
+; CHECK-NEXT:    v_readlane_b32 s9, v22, 45
+; CHECK-NEXT:    v_readlane_b32 s10, v22, 46
+; CHECK-NEXT:    v_readlane_b32 s11, v22, 47
+; CHECK-NEXT:    v_readlane_b32 s12, v22, 48
+; CHECK-NEXT:    v_readlane_b32 s13, v22, 49
+; CHECK-NEXT:    v_readlane_b32 s14, v22, 50
+; CHECK-NEXT:    v_readlane_b32 s15, v22, 51
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; use s[0:15]
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    v_readlane_b32 s0, v22, 52
+; CHECK-NEXT:    v_writelane_b32 v23, s52, 44
+; CHECK-NEXT:    v_readlane_b32 s1, v22, 53
+; CHECK-NEXT:    v_writelane_b32 v23, s53, 45
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; use s[0:1]
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    v_readlane_b32 s0, v22, 54
+; CHECK-NEXT:    v_writelane_b32 v23, s54, 46
+; CHECK-NEXT:    v_readlane_b32 s1, v22, 55
+; CHECK-NEXT:    v_readlane_b32 s2, v22, 56
+; CHECK-NEXT:    v_readlane_b32 s3, v22, 57
+; CHECK-NEXT:    v_writelane_b32 v23, s55, 47
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; use s[0:3]
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    v_readlane_b32 s0, v22, 58
+; CHECK-NEXT:    v_readlane_b32 s1, v22, 59
+; CHECK-NEXT:    v_readlane_b32 s2, v22, 60
+; CHECK-NEXT:    v_readlane_b32 s3, v22, 61
+; CHECK-NEXT:    v_readlane_b32 s4, v22, 62
+; CHECK-NEXT:    v_readlane_b32 s5, v22, 63
+; CHECK-NEXT:    v_readlane_b32 s6, v23, 0
+; CHECK-NEXT:    v_readlane_b32 s7, v23, 1
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; use s[0:7]
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    v_readlane_b32 s0, v23, 2
+; CHECK-NEXT:    v_readlane_b32 s1, v23, 3
+; CHECK-NEXT:    v_readlane_b32 s2, v23, 4
+; CHECK-NEXT:    v_readlane_b32 s3, v23, 5
+; CHECK-NEXT:    v_readlane_b32 s4, v23, 6
+; CHECK-NEXT:    v_readlane_b32 s5, v23, 7
+; CHECK-NEXT:    v_readlane_b32 s6, v23, 8
+; CHECK-NEXT:    v_readlane_b32 s7, v23, 9
+; CHECK-NEXT:    v_readlane_b32 s8, v23, 10
+; CHECK-NEXT:    v_readlane_b32 s9, v23, 11
+; CHECK-NEXT:    v_readlane_b32 s10, v23, 12
+; CHECK-NEXT:    v_readlane_b32 s11, v23, 13
+; CHECK-NEXT:    v_readlane_b32 s12, v23, 14
+; CHECK-NEXT:    v_readlane_b32 s13, v23, 15
+; CHECK-NEXT:    v_readlane_b32 s14, v23, 16
+; CHECK-NEXT:    v_readlane_b32 s15, v23, 17
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; use s[0:15]
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    v_readlane_b32 s0, v23, 18
+; CHECK-NEXT:    v_readlane_b32 s1, v23, 19
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; use s[0:1]
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    v_readlane_b32 s0, v23, 20
+; CHECK-NEXT:    v_readlane_b32 s1, v23, 21
+; CHECK-NEXT:    v_readlane_b32 s2, v23, 22
+; CHECK-NEXT:    v_readlane_b32 s3, v23, 23
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; use s[0:3]
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    v_readlane_b32 s0, v23, 24
+; CHECK-NEXT:    v_readlane_b32 s1, v23, 25
+; CHECK-NEXT:    v_readlane_b32 s2, v23, 26
+; CHECK-NEXT:    v_readlane_b32 s3, v23, 27
+; CHECK-NEXT:    v_readlane_b32 s4, v23, 28
+; CHECK-NEXT:    v_readlane_b32 s5, v23, 29
+; CHECK-NEXT:    v_readlane_b32 s6, v23, 30
+; CHECK-NEXT:    v_readlane_b32 s7, v23, 31
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; use s[0:7]
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    v_readlane_b32 s0, v23, 32
+; CHECK-NEXT:    v_readlane_b32 s1, v23, 33
+; CHECK-NEXT:    v_readlane_b32 s2, v23, 34
+; CHECK-NEXT:    v_readlane_b32 s3, v23, 35
+; CHECK-NEXT:    v_readlane_b32 s4, v23, 36
+; CHECK-NEXT:    v_readlane_b32 s5, v23, 37
+; CHECK-NEXT:    v_readlane_b32 s6, v23, 38
+; CHECK-NEXT:    v_readlane_b32 s7, v23, 39
+; CHECK-NEXT:    v_readlane_b32 s8, v23, 40
+; CHECK-NEXT:    v_readlane_b32 s9, v23, 41
+; CHECK-NEXT:    v_readlane_b32 s10, v23, 42
+; CHECK-NEXT:    v_readlane_b32 s11, v23, 43
+; CHECK-NEXT:    v_readlane_b32 s12, v23, 44
+; CHECK-NEXT:    v_readlane_b32 s13, v23, 45
+; CHECK-NEXT:    v_readlane_b32 s14, v23, 46
+; CHECK-NEXT:    v_readlane_b32 s15, v23, 47
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; use s[0:15]
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    s_endpgm
   call void asm sideeffect "", "~{v[0:7]}" () #0
   call void asm sideeffect "", "~{v[8:15]}" () #0
   call void asm sideeffect "", "~{v[16:19]}"() #0
   call void asm sideeffect "", "~{v[20:21]}"() #0
-  call void asm sideeffect "", "~{v22}"() #0
   %val0 = call <2 x i32> asm sideeffect "; def $0", "=s" () #0
   %val1 = call <4 x i32> asm sideeffect "; def $0", "=s" () #0
   %val2 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-no-vgprs.ll b/llvm/test/CodeGen/AMDGPU/sgpr-spill-no-vgprs.ll
index 9a75b6258433a..59036c64c8afc 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-no-vgprs.ll
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-no-vgprs.ll
@@ -1,8 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-; XFAIL: *
-
 ; The first 64 SGPR spills can go to a VGPR, but there isn't a second
 ; so some spills must be to memory. The last 16 element spill runs out of lanes at the 15th element.
 
@@ -11,19 +9,9 @@ define amdgpu_kernel void @partial_no_vgprs_last_sgpr_spill(ptr addrspace(1) %ou
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_add_u32 s0, s0, s13
 ; GCN-NEXT:    s_addc_u32 s1, s1, 0
-; GCN-NEXT:    ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
-; GCN-NEXT:    ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
 ; GCN-NEXT:    s_load_dword s4, s[6:7], 0x2
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    s_or_saveexec_b64 s[24:25], -1
-; GCN-NEXT:    buffer_load_dword v1, off, s[0:3], 0 offset:4 ; 4-byte Folded Reload
-; GCN-NEXT:    s_mov_b64 exec, s[24:25]
-; GCN-NEXT:    s_or_saveexec_b64 s[24:25], -1
-; GCN-NEXT:    buffer_load_dword v0, off, s[0:3], 0 ; 4-byte Folded Reload
-; GCN-NEXT:    s_mov_b64 exec, s[24:25]
-; GCN-NEXT:    ;;#ASMSTART
-; GCN-NEXT:    ;;#ASMEND
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ;;#ASMEND
 ; GCN-NEXT:    ;;#ASMSTART
@@ -33,91 +21,91 @@ define amdgpu_kernel void @partial_no_vgprs_last_sgpr_spill(ptr addrspace(1) %ou
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; def s[8:23]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    s_waitcnt vmcnt(1)
-; GCN-NEXT:    v_writelane_b32 v1, s8, 0
-; GCN-NEXT:    v_writelane_b32 v1, s9, 1
-; GCN-NEXT:    v_writelane_b32 v1, s10, 2
-; GCN-NEXT:    v_writelane_b32 v1, s11, 3
-; GCN-NEXT:    v_writelane_b32 v1, s12, 4
-; GCN-NEXT:    v_writelane_b32 v1, s13, 5
-; GCN-NEXT:    v_writelane_b32 v1, s14, 6
-; GCN-NEXT:    v_writelane_b32 v1, s15, 7
-; GCN-NEXT:    v_writelane_b32 v1, s16, 8
-; GCN-NEXT:    v_writelane_b32 v1, s17, 9
-; GCN-NEXT:    v_writelane_b32 v1, s18, 10
-; GCN-NEXT:    v_writelane_b32 v1, s19, 11
-; GCN-NEXT:    v_writelane_b32 v1, s20, 12
-; GCN-NEXT:    v_writelane_b32 v1, s21, 13
-; GCN-NEXT:    v_writelane_b32 v1, s22, 14
-; GCN-NEXT:    v_writelane_b32 v1, s23, 15
+; GCN-NEXT:    ; implicit-def: $vgpr22 : SGPR spill to VGPR lane
+; GCN-NEXT:    v_writelane_b32 v22, s8, 0
+; GCN-NEXT:    v_writelane_b32 v22, s9, 1
+; GCN-NEXT:    v_writelane_b32 v22, s10, 2
+; GCN-NEXT:    v_writelane_b32 v22, s11, 3
+; GCN-NEXT:    v_writelane_b32 v22, s12, 4
+; GCN-NEXT:    v_writelane_b32 v22, s13, 5
+; GCN-NEXT:    v_writelane_b32 v22, s14, 6
+; GCN-NEXT:    v_writelane_b32 v22, s15, 7
+; GCN-NEXT:    v_writelane_b32 v22, s16, 8
+; GCN-NEXT:    v_writelane_b32 v22, s17, 9
+; GCN-NEXT:    v_writelane_b32 v22, s18, 10
+; GCN-NEXT:    v_writelane_b32 v22, s19, 11
+; GCN-NEXT:    v_writelane_b32 v22, s20, 12
+; GCN-NEXT:    v_writelane_b32 v22, s21, 13
+; GCN-NEXT:    v_writelane_b32 v22, s22, 14
+; GCN-NEXT:    v_writelane_b32 v22, s23, 15
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; def s[8:23]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_writelane_b32 v1, s8, 16
-; GCN-NEXT:    v_writelane_b32 v1, s9, 17
-; GCN-NEXT:    v_writelane_b32 v1, s10, 18
-; GCN-NEXT:    v_writelane_b32 v1, s11, 19
-; GCN-NEXT:    v_writelane_b32 v1, s12, 20
-; GCN-NEXT:    v_writelane_b32 v1, s13, 21
-; GCN-NEXT:    v_writelane_b32 v1, s14, 22
-; GCN-NEXT:    v_writelane_b32 v1, s15, 23
-; GCN-NEXT:    v_writelane_b32 v1, s16, 24
-; GCN-NEXT:    v_writelane_b32 v1, s17, 25
-; GCN-NEXT:    v_writelane_b32 v1, s18, 26
-; GCN-NEXT:    v_writelane_b32 v1, s19, 27
-; GCN-NEXT:    v_writelane_b32 v1, s20, 28
-; GCN-NEXT:    v_writelane_b32 v1, s21, 29
-; GCN-NEXT:    v_writelane_b32 v1, s22, 30
-; GCN-NEXT:    v_writelane_b32 v1, s23, 31
+; GCN-NEXT:    v_writelane_b32 v22, s8, 16
+; GCN-NEXT:    v_writelane_b32 v22, s9, 17
+; GCN-NEXT:    v_writelane_b32 v22, s10, 18
+; GCN-NEXT:    v_writelane_b32 v22, s11, 19
+; GCN-NEXT:    v_writelane_b32 v22, s12, 20
+; GCN-NEXT:    v_writelane_b32 v22, s13, 21
+; GCN-NEXT:    v_writelane_b32 v22, s14, 22
+; GCN-NEXT:    v_writelane_b32 v22, s15, 23
+; GCN-NEXT:    v_writelane_b32 v22, s16, 24
+; GCN-NEXT:    v_writelane_b32 v22, s17, 25
+; GCN-NEXT:    v_writelane_b32 v22, s18, 26
+; GCN-NEXT:    v_writelane_b32 v22, s19, 27
+; GCN-NEXT:    v_writelane_b32 v22, s20, 28
+; GCN-NEXT:    v_writelane_b32 v22, s21, 29
+; GCN-NEXT:    v_writelane_b32 v22, s22, 30
+; GCN-NEXT:    v_writelane_b32 v22, s23, 31
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; def s[8:23]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_writelane_b32 v1, s8, 32
-; GCN-NEXT:    v_writelane_b32 v1, s9, 33
-; GCN-NEXT:    v_writelane_b32 v1, s10, 34
-; GCN-NEXT:    v_writelane_b32 v1, s11, 35
-; GCN-NEXT:    v_writelane_b32 v1, s12, 36
-; GCN-NEXT:    v_writelane_b32 v1, s13, 37
-; GCN-NEXT:    v_writelane_b32 v1, s14, 38
-; GCN-NEXT:    v_writelane_b32 v1, s15, 39
-; GCN-NEXT:    v_writelane_b32 v1, s16, 40
-; GCN-NEXT:    v_writelane_b32 v1, s17, 41
-; GCN-NEXT:    v_writelane_b32 v1, s18, 42
-; GCN-NEXT:    v_writelane_b32 v1, s19, 43
-; GCN-NEXT:    v_writelane_b32 v1, s20, 44
-; GCN-NEXT:    v_writelane_b32 v1, s21, 45
-; GCN-NEXT:    v_writelane_b32 v1, s22, 46
-; GCN-NEXT:    v_writelane_b32 v1, s23, 47
+; GCN-NEXT:    v_writelane_b32 v22, s8, 32
+; GCN-NEXT:    v_writelane_b32 v22, s9, 33
+; GCN-NEXT:    v_writelane_b32 v22, s10, 34
+; GCN-NEXT:    v_writelane_b32 v22, s11, 35
+; GCN-NEXT:    v_writelane_b32 v22, s12, 36
+; GCN-NEXT:    v_writelane_b32 v22, s13, 37
+; GCN-NEXT:    v_writelane_b32 v22, s14, 38
+; GCN-NEXT:    v_writelane_b32 v22, s15, 39
+; GCN-NEXT:    v_writelane_b32 v22, s16, 40
+; GCN-NEXT:    v_writelane_b32 v22, s17, 41
+; GCN-NEXT:    v_writelane_b32 v22, s18, 42
+; GCN-NEXT:    v_writelane_b32 v22, s19, 43
+; GCN-NEXT:    v_writelane_b32 v22, s20, 44
+; GCN-NEXT:    v_writelane_b32 v22, s21, 45
+; GCN-NEXT:    v_writelane_b32 v22, s22, 46
+; GCN-NEXT:    v_writelane_b32 v22, s23, 47
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; def s[8:23]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_writelane_b32 v1, s8, 48
-; GCN-NEXT:    v_writelane_b32 v1, s9, 49
-; GCN-NEXT:    v_writelane_b32 v1, s10, 50
-; GCN-NEXT:    v_writelane_b32 v1, s11, 51
-; GCN-NEXT:    v_writelane_b32 v1, s12, 52
-; GCN-NEXT:    v_writelane_b32 v1, s13, 53
-; GCN-NEXT:    v_writelane_b32 v1, s14, 54
-; GCN-NEXT:    v_writelane_b32 v1, s15, 55
-; GCN-NEXT:    v_writelane_b32 v1, s16, 56
-; GCN-NEXT:    v_writelane_b32 v1, s17, 57
-; GCN-NEXT:    v_writelane_b32 v1, s18, 58
-; GCN-NEXT:    v_writelane_b32 v1, s19, 59
-; GCN-NEXT:    v_writelane_b32 v1, s20, 60
-; GCN-NEXT:    v_writelane_b32 v1, s21, 61
-; GCN-NEXT:    v_writelane_b32 v1, s22, 62
-; GCN-NEXT:    v_writelane_b32 v1, s23, 63
+; GCN-NEXT:    v_writelane_b32 v22, s8, 48
+; GCN-NEXT:    v_writelane_b32 v22, s9, 49
+; GCN-NEXT:    v_writelane_b32 v22, s10, 50
+; GCN-NEXT:    v_writelane_b32 v22, s11, 51
+; GCN-NEXT:    v_writelane_b32 v22, s12, 52
+; GCN-NEXT:    v_writelane_b32 v22, s13, 53
+; GCN-NEXT:    v_writelane_b32 v22, s14, 54
+; GCN-NEXT:    v_writelane_b32 v22, s15, 55
+; GCN-NEXT:    v_writelane_b32 v22, s16, 56
+; GCN-NEXT:    v_writelane_b32 v22, s17, 57
+; GCN-NEXT:    v_writelane_b32 v22, s18, 58
+; GCN-NEXT:    v_writelane_b32 v22, s19, 59
+; GCN-NEXT:    v_writelane_b32 v22, s20, 60
+; GCN-NEXT:    v_writelane_b32 v22, s21, 61
+; GCN-NEXT:    v_writelane_b32 v22, s22, 62
+; GCN-NEXT:    v_writelane_b32 v22, s23, 63
 ; GCN-NEXT:    s_or_saveexec_b64 s[24:25], -1
-; GCN-NEXT:    buffer_store_dword v1, off, s[0:3], 0 offset:4 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v22, off, s[0:3], 0 offset:4 ; 4-byte Folded Spill
 ; GCN-NEXT:    s_mov_b64 exec, s[24:25]
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; def s[6:7]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    s_waitcnt vmcnt(1)
-; GCN-NEXT:    v_writelane_b32 v0, s6, 0
-; GCN-NEXT:    v_writelane_b32 v0, s7, 1
+; GCN-NEXT:    ; implicit-def: $vgpr22 : SGPR spill to VGPR lane
+; GCN-NEXT:    v_writelane_b32 v22, s6, 0
+; GCN-NEXT:    v_writelane_b32 v22, s7, 1
 ; GCN-NEXT:    s_or_saveexec_b64 s[24:25], -1
-; GCN-NEXT:    buffer_store_dword v0, off, s[0:3], 0 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v22, off, s[0:3], 0 ; 4-byte Folded Spill
 ; GCN-NEXT:    s_mov_b64 exec, s[24:25]
 ; GCN-NEXT:    s_mov_b32 s5, 0
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
@@ -125,88 +113,88 @@ define amdgpu_kernel void @partial_no_vgprs_last_sgpr_spill(ptr addrspace(1) %ou
 ; GCN-NEXT:    s_cbranch_scc1 .LBB0_2
 ; GCN-NEXT:  ; %bb.1: ; %bb0
 ; GCN-NEXT:    s_or_saveexec_b64 s[24:25], -1
-; GCN-NEXT:    buffer_load_dword v1, off, s[0:3], 0 offset:4 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v23, off, s[0:3], 0 offset:4 ; 4-byte Folded Reload
 ; GCN-NEXT:    s_mov_b64 exec, s[24:25]
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    v_readlane_b32 s4, v1, 0
-; GCN-NEXT:    v_readlane_b32 s5, v1, 1
-; GCN-NEXT:    v_readlane_b32 s6, v1, 2
-; GCN-NEXT:    v_readlane_b32 s7, v1, 3
-; GCN-NEXT:    v_readlane_b32 s8, v1, 4
-; GCN-NEXT:    v_readlane_b32 s9, v1, 5
-; GCN-NEXT:    v_readlane_b32 s10, v1, 6
-; GCN-NEXT:    v_readlane_b32 s11, v1, 7
-; GCN-NEXT:    v_readlane_b32 s12, v1, 8
-; GCN-NEXT:    v_readlane_b32 s13, v1, 9
-; GCN-NEXT:    v_readlane_b32 s14, v1, 10
-; GCN-NEXT:    v_readlane_b32 s15, v1, 11
-; GCN-NEXT:    v_readlane_b32 s16, v1, 12
-; GCN-NEXT:    v_readlane_b32 s17, v1, 13
-; GCN-NEXT:    v_readlane_b32 s18, v1, 14
-; GCN-NEXT:    v_readlane_b32 s19, v1, 15
+; GCN-NEXT:    v_readlane_b32 s4, v23, 0
+; GCN-NEXT:    v_readlane_b32 s5, v23, 1
+; GCN-NEXT:    v_readlane_b32 s6, v23, 2
+; GCN-NEXT:    v_readlane_b32 s7, v23, 3
+; GCN-NEXT:    v_readlane_b32 s8, v23, 4
+; GCN-NEXT:    v_readlane_b32 s9, v23, 5
+; GCN-NEXT:    v_readlane_b32 s10, v23, 6
+; GCN-NEXT:    v_readlane_b32 s11, v23, 7
+; GCN-NEXT:    v_readlane_b32 s12, v23, 8
+; GCN-NEXT:    v_readlane_b32 s13, v23, 9
+; GCN-NEXT:    v_readlane_b32 s14, v23, 10
+; GCN-NEXT:    v_readlane_b32 s15, v23, 11
+; GCN-NEXT:    v_readlane_b32 s16, v23, 12
+; GCN-NEXT:    v_readlane_b32 s17, v23, 13
+; GCN-NEXT:    v_readlane_b32 s18, v23, 14
+; GCN-NEXT:    v_readlane_b32 s19, v23, 15
 ; GCN-NEXT:    s_or_saveexec_b64 s[24:25], -1
-; GCN-NEXT:    buffer_load_dword v0, off, s[0:3], 0 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v22, off, s[0:3], 0 ; 4-byte Folded Reload
 ; GCN-NEXT:    s_mov_b64 exec, s[24:25]
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; use s[4:19]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_readlane_b32 s4, v1, 16
-; GCN-NEXT:    v_readlane_b32 s5, v1, 17
-; GCN-NEXT:    v_readlane_b32 s6, v1, 18
-; GCN-NEXT:    v_readlane_b32 s7, v1, 19
-; GCN-NEXT:    v_readlane_b32 s8, v1, 20
-; GCN-NEXT:    v_readlane_b32 s9, v1, 21
-; GCN-NEXT:    v_readlane_b32 s10, v1, 22
-; GCN-NEXT:    v_readlane_b32 s11, v1, 23
-; GCN-NEXT:    v_readlane_b32 s12, v1, 24
-; GCN-NEXT:    v_readlane_b32 s13, v1, 25
-; GCN-NEXT:    v_readlane_b32 s14, v1, 26
-; GCN-NEXT:    v_readlane_b32 s15, v1, 27
-; GCN-NEXT:    v_readlane_b32 s16, v1, 28
-; GCN-NEXT:    v_readlane_b32 s17, v1, 29
-; GCN-NEXT:    v_readlane_b32 s18, v1, 30
-; GCN-NEXT:    v_readlane_b32 s19, v1, 31
+; GCN-NEXT:    v_readlane_b32 s4, v23, 16
+; GCN-NEXT:    v_readlane_b32 s5, v23, 17
+; GCN-NEXT:    v_readlane_b32 s6, v23, 18
+; GCN-NEXT:    v_readlane_b32 s7, v23, 19
+; GCN-NEXT:    v_readlane_b32 s8, v23, 20
+; GCN-NEXT:    v_readlane_b32 s9, v23, 21
+; GCN-NEXT:    v_readlane_b32 s10, v23, 22
+; GCN-NEXT:    v_readlane_b32 s11, v23, 23
+; GCN-NEXT:    v_readlane_b32 s12, v23, 24
+; GCN-NEXT:    v_readlane_b32 s13, v23, 25
+; GCN-NEXT:    v_readlane_b32 s14, v23, 26
+; GCN-NEXT:    v_readlane_b32 s15, v23, 27
+; GCN-NEXT:    v_readlane_b32 s16, v23, 28
+; GCN-NEXT:    v_readlane_b32 s17, v23, 29
+; GCN-NEXT:    v_readlane_b32 s18, v23, 30
+; GCN-NEXT:    v_readlane_b32 s19, v23, 31
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; use s[4:19]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_readlane_b32 s4, v1, 32
-; GCN-NEXT:    v_readlane_b32 s5, v1, 33
-; GCN-NEXT:    v_readlane_b32 s6, v1, 34
-; GCN-NEXT:    v_readlane_b32 s7, v1, 35
-; GCN-NEXT:    v_readlane_b32 s8, v1, 36
-; GCN-NEXT:    v_readlane_b32 s9, v1, 37
-; GCN-NEXT:    v_readlane_b32 s10, v1, 38
-; GCN-NEXT:    v_readlane_b32 s11, v1, 39
-; GCN-NEXT:    v_readlane_b32 s12, v1, 40
-; GCN-NEXT:    v_readlane_b32 s13, v1, 41
-; GCN-NEXT:    v_readlane_b32 s14, v1, 42
-; GCN-NEXT:    v_readlane_b32 s15, v1, 43
-; GCN-NEXT:    v_readlane_b32 s16, v1, 44
-; GCN-NEXT:    v_readlane_b32 s17, v1, 45
-; GCN-NEXT:    v_readlane_b32 s18, v1, 46
-; GCN-NEXT:    v_readlane_b32 s19, v1, 47
+; GCN-NEXT:    v_readlane_b32 s4, v23, 32
+; GCN-NEXT:    v_readlane_b32 s5, v23, 33
+; GCN-NEXT:    v_readlane_b32 s6, v23, 34
+; GCN-NEXT:    v_readlane_b32 s7, v23, 35
+; GCN-NEXT:    v_readlane_b32 s8, v23, 36
+; GCN-NEXT:    v_readlane_b32 s9, v23, 37
+; GCN-NEXT:    v_readlane_b32 s10, v23, 38
+; GCN-NEXT:    v_readlane_b32 s11, v23, 39
+; GCN-NEXT:    v_readlane_b32 s12, v23, 40
+; GCN-NEXT:    v_readlane_b32 s13, v23, 41
+; GCN-NEXT:    v_readlane_b32 s14, v23, 42
+; GCN-NEXT:    v_readlane_b32 s15, v23, 43
+; GCN-NEXT:    v_readlane_b32 s16, v23, 44
+; GCN-NEXT:    v_readlane_b32 s17, v23, 45
+; GCN-NEXT:    v_readlane_b32 s18, v23, 46
+; GCN-NEXT:    v_readlane_b32 s19, v23, 47
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; use s[4:19]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_readlane_b32 s8, v1, 48
-; GCN-NEXT:    v_readlane_b32 s9, v1, 49
-; GCN-NEXT:    v_readlane_b32 s10, v1, 50
-; GCN-NEXT:    v_readlane_b32 s11, v1, 51
-; GCN-NEXT:    v_readlane_b32 s12, v1, 52
-; GCN-NEXT:    v_readlane_b32 s13, v1, 53
-; GCN-NEXT:    v_readlane_b32 s14, v1, 54
-; GCN-NEXT:    v_readlane_b32 s15, v1, 55
-; GCN-NEXT:    v_readlane_b32 s16, v1, 56
-; GCN-NEXT:    v_readlane_b32 s17, v1, 57
-; GCN-NEXT:    v_readlane_b32 s18, v1, 58
-; GCN-NEXT:    v_readlane_b32 s19, v1, 59
-; GCN-NEXT:    v_readlane_b32 s20, v1, 60
-; GCN-NEXT:    v_readlane_b32 s21, v1, 61
-; GCN-NEXT:    v_readlane_b32 s22, v1, 62
-; GCN-NEXT:    v_readlane_b32 s23, v1, 63
+; GCN-NEXT:    v_readlane_b32 s8, v23, 48
+; GCN-NEXT:    v_readlane_b32 s9, v23, 49
+; GCN-NEXT:    v_readlane_b32 s10, v23, 50
+; GCN-NEXT:    v_readlane_b32 s11, v23, 51
+; GCN-NEXT:    v_readlane_b32 s12, v23, 52
+; GCN-NEXT:    v_readlane_b32 s13, v23, 53
+; GCN-NEXT:    v_readlane_b32 s14, v23, 54
+; GCN-NEXT:    v_readlane_b32 s15, v23, 55
+; GCN-NEXT:    v_readlane_b32 s16, v23, 56
+; GCN-NEXT:    v_readlane_b32 s17, v23, 57
+; GCN-NEXT:    v_readlane_b32 s18, v23, 58
+; GCN-NEXT:    v_readlane_b32 s19, v23, 59
+; GCN-NEXT:    v_readlane_b32 s20, v23, 60
+; GCN-NEXT:    v_readlane_b32 s21, v23, 61
+; GCN-NEXT:    v_readlane_b32 s22, v23, 62
+; GCN-NEXT:    v_readlane_b32 s23, v23, 63
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    v_readlane_b32 s4, v0, 0
-; GCN-NEXT:    v_readlane_b32 s5, v0, 1
+; GCN-NEXT:    v_readlane_b32 s4, v22, 0
+; GCN-NEXT:    v_readlane_b32 s5, v22, 1
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; use s[8:23]
 ; GCN-NEXT:    ;;#ASMEND
@@ -214,20 +202,11 @@ define amdgpu_kernel void @partial_no_vgprs_last_sgpr_spill(ptr addrspace(1) %ou
 ; GCN-NEXT:    ; use s[4:5]
 ; GCN-NEXT:    ;;#ASMEND
 ; GCN-NEXT:  .LBB0_2: ; %ret
-; GCN-NEXT:    s_or_saveexec_b64 s[24:25], -1
-; GCN-NEXT:    buffer_load_dword v0, off, s[0:3], 0 ; 4-byte Folded Reload
-; GCN-NEXT:    s_mov_b64 exec, s[24:25]
-; GCN-NEXT:    s_or_saveexec_b64 s[24:25], -1
-; GCN-NEXT:    buffer_load_dword v1, off, s[0:3], 0 offset:4 ; 4-byte Folded Reload
-; GCN-NEXT:    s_mov_b64 exec, s[24:25]
-; GCN-NEXT:    ; kill: killed $vgpr1
-; GCN-NEXT:    ; kill: killed $vgpr0
 ; GCN-NEXT:    s_endpgm
   call void asm sideeffect "", "~{v[0:7]}" () #0
   call void asm sideeffect "", "~{v[8:15]}" () #0
   call void asm sideeffect "", "~{v[16:19]}"() #0
   call void asm sideeffect "", "~{v[20:21]}"() #0
-  call void asm sideeffect "", "~{v22}"() #0
 
   %wide.sgpr0 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
   %wide.sgpr1 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0

>From 103b2d2bcea1e28ef9b863d53fa5bedba0e212ac Mon Sep 17 00:00:00 2001
From: Christudasan Devadasan <Christudasan.Devadasan at amd.com>
Date: Tue, 11 Jun 2024 13:44:40 +0530
Subject: [PATCH 06/13] Removed the irrelavant test si-spill-sgpr-stack.ll.

---
 .../CodeGen/AMDGPU/si-spill-sgpr-stack.ll     | 75 -------------------
 1 file changed, 75 deletions(-)
 delete mode 100644 llvm/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll

diff --git a/llvm/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll b/llvm/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll
deleted file mode 100644
index afee1caa7ab4b..0000000000000
--- a/llvm/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll
+++ /dev/null
@@ -1,75 +0,0 @@
-; RUN: llc -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=SGPR %s
-
-; XFAIL: *
-
-; Make sure this doesn't crash.
-; ALL-LABEL: {{^}}test:
-; ALL: s_mov_b32 s[[LO:[0-9]+]], SCRATCH_RSRC_DWORD0
-; ALL: s_mov_b32 s[[HI:[0-9]+]], 0xe80000
-
-; Make sure we are handling hazards correctly.
-; SGPR: v_mov_b32_e32 v0, vcc_lo
-; SGPR-NEXT: s_or_saveexec_b64 [[EXEC_COPY:s\[[0-9]+:[0-9]+\]]], -1
-; SGPR-NEXT: buffer_load_dword [[VHI:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], 0 ; 4-byte Folded Reload
-; SGPR-NEXT: s_mov_b64 exec, [[EXEC_COPY]]
-; SGPR-NEXT: s_waitcnt vmcnt(0)
-; SGPR-NEXT: v_readlane_b32 s{{[0-9]+}}, [[VHI]], 0
-; SGPR-NEXT: v_readlane_b32 s{{[0-9]+}}, [[VHI]], 1
-; SGPR-NEXT: v_readlane_b32 s{{[0-9]+}}, [[VHI]], 2
-; SGPR-NEXT: v_readlane_b32 s[[HI:[0-9]+]], [[VHI]], 3
-; SGPR-NEXT: s_or_saveexec_b64 s[100:101], -1
-; SGPR-NEXT: s_mov_b64 exec, s[100:101]
-; SGPR-NEXT: s_nop 2
-; SGPR-NEXT: buffer_store_dword v0, off, s[{{[0-9]+}}:[[HI]]], 0
-; SGPR-NEXT: ; kill: killed $vgpr1
-
-; ALL: s_endpgm
-define amdgpu_kernel void @test(ptr addrspace(1) %out, i32 %in) {
-  call void asm sideeffect "", "~{s[0:7]}" ()
-  call void asm sideeffect "", "~{s[8:15]}" ()
-  call void asm sideeffect "", "~{s[16:23]}" ()
-  call void asm sideeffect "", "~{s[24:31]}" ()
-  call void asm sideeffect "", "~{s[32:39]}" ()
-  call void asm sideeffect "", "~{s[40:47]}" ()
-  call void asm sideeffect "", "~{s[48:55]}" ()
-  call void asm sideeffect "", "~{s[56:63]}" ()
-  call void asm sideeffect "", "~{s[64:71]}" ()
-  call void asm sideeffect "", "~{s[72:79]}" ()
-  call void asm sideeffect "", "~{s[80:87]}" ()
-  call void asm sideeffect "", "~{s[88:95]}" ()
-  call void asm sideeffect "", "~{v[0:7]}" ()
-  call void asm sideeffect "", "~{v[8:15]}" ()
-  call void asm sideeffect "", "~{v[16:23]}" ()
-  call void asm sideeffect "", "~{v[24:31]}" ()
-  call void asm sideeffect "", "~{v[32:39]}" ()
-  call void asm sideeffect "", "~{v[40:47]}" ()
-  call void asm sideeffect "", "~{v[48:55]}" ()
-  call void asm sideeffect "", "~{v[56:63]}" ()
-  call void asm sideeffect "", "~{v[64:71]}" ()
-  call void asm sideeffect "", "~{v[72:79]}" ()
-  call void asm sideeffect "", "~{v[80:87]}" ()
-  call void asm sideeffect "", "~{v[88:95]}" ()
-  call void asm sideeffect "", "~{v[96:103]}" ()
-  call void asm sideeffect "", "~{v[104:111]}" ()
-  call void asm sideeffect "", "~{v[112:119]}" ()
-  call void asm sideeffect "", "~{v[120:127]}" ()
-  call void asm sideeffect "", "~{v[128:135]}" ()
-  call void asm sideeffect "", "~{v[136:143]}" ()
-  call void asm sideeffect "", "~{v[144:151]}" ()
-  call void asm sideeffect "", "~{v[152:159]}" ()
-  call void asm sideeffect "", "~{v[160:167]}" ()
-  call void asm sideeffect "", "~{v[168:175]}" ()
-  call void asm sideeffect "", "~{v[176:183]}" ()
-  call void asm sideeffect "", "~{v[184:191]}" ()
-  call void asm sideeffect "", "~{v[192:199]}" ()
-  call void asm sideeffect "", "~{v[200:207]}" ()
-  call void asm sideeffect "", "~{v[208:215]}" ()
-  call void asm sideeffect "", "~{v[216:223]}" ()
-  call void asm sideeffect "", "~{v[224:231]}" ()
-  call void asm sideeffect "", "~{v[232:239]}" ()
-  call void asm sideeffect "", "~{v[240:247]}" ()
-  call void asm sideeffect "", "~{v[248:255]}" ()
-
-  store i32 %in, ptr addrspace(1) %out
-  ret void
-}

>From f0fb4606c8198e56920c44e853001386001f7d02 Mon Sep 17 00:00:00 2001
From: Christudasan Devadasan <Christudasan.Devadasan at amd.com>
Date: Tue, 11 Jun 2024 15:06:00 +0530
Subject: [PATCH 07/13] Added a negative test for wwm-regalloc error.

---
 llvm/test/CodeGen/AMDGPU/wwm-regalloc-error.ll | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)
 create mode 100644 llvm/test/CodeGen/AMDGPU/wwm-regalloc-error.ll

diff --git a/llvm/test/CodeGen/AMDGPU/wwm-regalloc-error.ll b/llvm/test/CodeGen/AMDGPU/wwm-regalloc-error.ll
new file mode 100644
index 0000000000000..e2c0bb2acb76c
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/wwm-regalloc-error.ll
@@ -0,0 +1,17 @@
+; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-flat-for-global -stress-regalloc=3 -filetype=null %s 2>&1 | FileCheck %s
+
+; A negative test to capture the expected error when the VGPRs are insufficient for wwm-regalloc.
+
+; CHECK: error: can't find enough VGPRs for wwm-regalloc
+
+define amdgpu_kernel void @test() #0 {
+  call void asm sideeffect "", "~{v[0:1]}" ()
+  %val0 = call i32 asm sideeffect "; def $0", "=s" ()
+  %val1 = call i32 asm sideeffect "; def $0", "=s" ()
+  store i32 10, ptr addrspace(1) null
+  call void asm sideeffect "; use $0", "s"(i32 %val0)
+  call void asm sideeffect "; use $0", "s"(i32 %val1)
+  ret void
+}
+
+attributes #0 = { nounwind "amdgpu-num-vgpr"="2"}

>From 5af279fb0a517d7ae9eff4203cac30f4bdec94a0 Mon Sep 17 00:00:00 2001
From: Christudasan Devadasan <Christudasan.Devadasan at amd.com>
Date: Tue, 11 Jun 2024 15:12:17 +0530
Subject: [PATCH 08/13] Improved a comment.

---
 llvm/lib/Target/AMDGPU/AMDGPUReserveWWMRegs.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUReserveWWMRegs.cpp b/llvm/lib/Target/AMDGPU/AMDGPUReserveWWMRegs.cpp
index 9066545138baa..3879ddeb74eda 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUReserveWWMRegs.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUReserveWWMRegs.cpp
@@ -80,8 +80,9 @@ bool AMDGPUReserveWWMRegs::runOnMachineFunction(MachineFunction &MF) {
     }
   }
 
-  // Reset the renamable flag for MOs involving wwm-regs to get rid of the MIR
-  // Verifier error.
+  // The renamable flag can't be set for reserved registers. Reset the flag for
+  // MOs involving wwm-regs as they will be reserved during vgpr-regalloc
+  // pipeline.
   for (MachineBasicBlock &MBB : MF) {
     for (MachineInstr &MI : MBB) {
       for (MachineOperand &MO : MI.operands()) {

>From 9c743f96e432843b121cbff4a1c9a3f85f965f89 Mon Sep 17 00:00:00 2001
From: Christudasan Devadasan <Christudasan.Devadasan at amd.com>
Date: Tue, 11 Jun 2024 15:17:46 +0530
Subject: [PATCH 09/13] fixed test sgpr-spill-incorrect-fi-bookkeeping-bug.ll

---
 ...sgpr-spill-incorrect-fi-bookkeeping-bug.ll | 357 ------------------
 1 file changed, 357 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-incorrect-fi-bookkeeping-bug.ll b/llvm/test/CodeGen/AMDGPU/sgpr-spill-incorrect-fi-bookkeeping-bug.ll
index 0af4b0c6269e7..42da8d7af10df 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-incorrect-fi-bookkeeping-bug.ll
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-incorrect-fi-bookkeeping-bug.ll
@@ -6,363 +6,6 @@
 
 ; CHECK-LABEL: {{^}}kernel0:
 define amdgpu_kernel void @kernel0(ptr addrspace(1) %out, i32 %in) #1 {
-; CHECK-LABEL: kernel0:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    ;;#ASMSTART
-; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    ;;#ASMSTART
-; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    ;;#ASMSTART
-; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    ;;#ASMSTART
-; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    ;;#ASMSTART
-; CHECK-NEXT:    ; def s[34:35]
-; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    ;;#ASMSTART
-; CHECK-NEXT:    ; def s[0:3]
-; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    ;;#ASMSTART
-; CHECK-NEXT:    ; def s[24:31]
-; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    ;;#ASMSTART
-; CHECK-NEXT:    ; def s[8:23]
-; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    ;;#ASMSTART
-; CHECK-NEXT:    ; def s[4:5]
-; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    ;;#ASMSTART
-; CHECK-NEXT:    ; def s[36:39]
-; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    ;;#ASMSTART
-; CHECK-NEXT:    ; def s[40:47]
-; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    ; implicit-def: $vgpr22 : SGPR spill to VGPR lane
-; CHECK-NEXT:    ; implicit-def: $vgpr23 : SGPR spill to VGPR lane
-; CHECK-NEXT:    v_writelane_b32 v22, s40, 0
-; CHECK-NEXT:    v_writelane_b32 v22, s41, 1
-; CHECK-NEXT:    v_writelane_b32 v22, s42, 2
-; CHECK-NEXT:    v_writelane_b32 v22, s43, 3
-; CHECK-NEXT:    v_writelane_b32 v22, s44, 4
-; CHECK-NEXT:    v_writelane_b32 v22, s45, 5
-; CHECK-NEXT:    v_writelane_b32 v22, s46, 6
-; CHECK-NEXT:    v_writelane_b32 v22, s47, 7
-; CHECK-NEXT:    ;;#ASMSTART
-; CHECK-NEXT:    ; def s[40:55]
-; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    v_writelane_b32 v22, s40, 8
-; CHECK-NEXT:    v_writelane_b32 v22, s41, 9
-; CHECK-NEXT:    v_writelane_b32 v22, s42, 10
-; CHECK-NEXT:    v_writelane_b32 v22, s43, 11
-; CHECK-NEXT:    v_writelane_b32 v22, s44, 12
-; CHECK-NEXT:    v_writelane_b32 v22, s45, 13
-; CHECK-NEXT:    v_writelane_b32 v22, s46, 14
-; CHECK-NEXT:    v_writelane_b32 v22, s47, 15
-; CHECK-NEXT:    v_writelane_b32 v22, s48, 16
-; CHECK-NEXT:    v_writelane_b32 v22, s49, 17
-; CHECK-NEXT:    v_writelane_b32 v22, s50, 18
-; CHECK-NEXT:    v_writelane_b32 v22, s51, 19
-; CHECK-NEXT:    v_writelane_b32 v22, s52, 20
-; CHECK-NEXT:    v_writelane_b32 v22, s53, 21
-; CHECK-NEXT:    v_writelane_b32 v22, s54, 22
-; CHECK-NEXT:    v_writelane_b32 v22, s55, 23
-; CHECK-NEXT:    ;;#ASMSTART
-; CHECK-NEXT:    ; def s[6:7]
-; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    ;;#ASMSTART
-; CHECK-NEXT:    ; def s[40:43]
-; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    v_writelane_b32 v22, s40, 24
-; CHECK-NEXT:    v_writelane_b32 v22, s41, 25
-; CHECK-NEXT:    v_writelane_b32 v22, s42, 26
-; CHECK-NEXT:    v_writelane_b32 v22, s43, 27
-; CHECK-NEXT:    ;;#ASMSTART
-; CHECK-NEXT:    ; def s[40:47]
-; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    v_writelane_b32 v22, s40, 28
-; CHECK-NEXT:    v_writelane_b32 v22, s41, 29
-; CHECK-NEXT:    v_writelane_b32 v22, s42, 30
-; CHECK-NEXT:    v_writelane_b32 v22, s43, 31
-; CHECK-NEXT:    v_writelane_b32 v22, s44, 32
-; CHECK-NEXT:    v_writelane_b32 v22, s45, 33
-; CHECK-NEXT:    v_writelane_b32 v22, s46, 34
-; CHECK-NEXT:    v_writelane_b32 v22, s47, 35
-; CHECK-NEXT:    ;;#ASMSTART
-; CHECK-NEXT:    ; def s[40:55]
-; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    v_writelane_b32 v22, s40, 36
-; CHECK-NEXT:    v_writelane_b32 v22, s41, 37
-; CHECK-NEXT:    v_writelane_b32 v22, s42, 38
-; CHECK-NEXT:    v_writelane_b32 v22, s43, 39
-; CHECK-NEXT:    v_writelane_b32 v22, s44, 40
-; CHECK-NEXT:    v_writelane_b32 v22, s45, 41
-; CHECK-NEXT:    v_writelane_b32 v22, s46, 42
-; CHECK-NEXT:    v_writelane_b32 v22, s47, 43
-; CHECK-NEXT:    v_writelane_b32 v22, s48, 44
-; CHECK-NEXT:    v_writelane_b32 v22, s49, 45
-; CHECK-NEXT:    v_writelane_b32 v22, s50, 46
-; CHECK-NEXT:    v_writelane_b32 v22, s51, 47
-; CHECK-NEXT:    v_writelane_b32 v22, s52, 48
-; CHECK-NEXT:    v_writelane_b32 v22, s53, 49
-; CHECK-NEXT:    v_writelane_b32 v22, s54, 50
-; CHECK-NEXT:    v_writelane_b32 v22, s55, 51
-; CHECK-NEXT:    ;;#ASMSTART
-; CHECK-NEXT:    ; def s[40:41]
-; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    v_writelane_b32 v22, s40, 52
-; CHECK-NEXT:    v_writelane_b32 v22, s41, 53
-; CHECK-NEXT:    ;;#ASMSTART
-; CHECK-NEXT:    ; def s[40:43]
-; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    v_writelane_b32 v22, s40, 54
-; CHECK-NEXT:    v_writelane_b32 v22, s41, 55
-; CHECK-NEXT:    v_writelane_b32 v22, s42, 56
-; CHECK-NEXT:    v_writelane_b32 v22, s43, 57
-; CHECK-NEXT:    ;;#ASMSTART
-; CHECK-NEXT:    ; def s[40:47]
-; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    v_writelane_b32 v22, s40, 58
-; CHECK-NEXT:    v_writelane_b32 v22, s41, 59
-; CHECK-NEXT:    v_writelane_b32 v22, s42, 60
-; CHECK-NEXT:    v_writelane_b32 v22, s43, 61
-; CHECK-NEXT:    v_writelane_b32 v22, s44, 62
-; CHECK-NEXT:    v_writelane_b32 v23, s46, 0
-; CHECK-NEXT:    v_writelane_b32 v22, s45, 63
-; CHECK-NEXT:    v_writelane_b32 v23, s47, 1
-; CHECK-NEXT:    ;;#ASMSTART
-; CHECK-NEXT:    ; def s[40:55]
-; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    v_writelane_b32 v23, s40, 2
-; CHECK-NEXT:    v_writelane_b32 v23, s41, 3
-; CHECK-NEXT:    v_writelane_b32 v23, s42, 4
-; CHECK-NEXT:    v_writelane_b32 v23, s43, 5
-; CHECK-NEXT:    v_writelane_b32 v23, s44, 6
-; CHECK-NEXT:    v_writelane_b32 v23, s45, 7
-; CHECK-NEXT:    v_writelane_b32 v23, s46, 8
-; CHECK-NEXT:    v_writelane_b32 v23, s47, 9
-; CHECK-NEXT:    v_writelane_b32 v23, s48, 10
-; CHECK-NEXT:    v_writelane_b32 v23, s49, 11
-; CHECK-NEXT:    v_writelane_b32 v23, s50, 12
-; CHECK-NEXT:    v_writelane_b32 v23, s51, 13
-; CHECK-NEXT:    v_writelane_b32 v23, s52, 14
-; CHECK-NEXT:    v_writelane_b32 v23, s53, 15
-; CHECK-NEXT:    v_writelane_b32 v23, s54, 16
-; CHECK-NEXT:    v_writelane_b32 v23, s55, 17
-; CHECK-NEXT:    ;;#ASMSTART
-; CHECK-NEXT:    ; def s[40:41]
-; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    v_writelane_b32 v23, s40, 18
-; CHECK-NEXT:    v_writelane_b32 v23, s41, 19
-; CHECK-NEXT:    ;;#ASMSTART
-; CHECK-NEXT:    ; def s[40:43]
-; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    v_writelane_b32 v23, s40, 20
-; CHECK-NEXT:    v_writelane_b32 v23, s41, 21
-; CHECK-NEXT:    v_writelane_b32 v23, s42, 22
-; CHECK-NEXT:    v_writelane_b32 v23, s43, 23
-; CHECK-NEXT:    ;;#ASMSTART
-; CHECK-NEXT:    ; def s[40:47]
-; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    v_writelane_b32 v23, s40, 24
-; CHECK-NEXT:    v_writelane_b32 v23, s41, 25
-; CHECK-NEXT:    v_writelane_b32 v23, s42, 26
-; CHECK-NEXT:    v_writelane_b32 v23, s43, 27
-; CHECK-NEXT:    v_writelane_b32 v23, s44, 28
-; CHECK-NEXT:    v_writelane_b32 v23, s45, 29
-; CHECK-NEXT:    v_writelane_b32 v23, s46, 30
-; CHECK-NEXT:    v_writelane_b32 v23, s47, 31
-; CHECK-NEXT:    ;;#ASMSTART
-; CHECK-NEXT:    ; def s[40:55]
-; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    v_writelane_b32 v23, s40, 32
-; CHECK-NEXT:    v_writelane_b32 v23, s41, 33
-; CHECK-NEXT:    v_writelane_b32 v23, s42, 34
-; CHECK-NEXT:    v_writelane_b32 v23, s43, 35
-; CHECK-NEXT:    v_writelane_b32 v23, s44, 36
-; CHECK-NEXT:    ;;#ASMSTART
-; CHECK-NEXT:    ; use s[34:35]
-; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    ;;#ASMSTART
-; CHECK-NEXT:    ; use s[0:3]
-; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    ;;#ASMSTART
-; CHECK-NEXT:    ; use s[24:31]
-; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    ;;#ASMSTART
-; CHECK-NEXT:    ; use s[8:23]
-; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    v_readlane_b32 s8, v22, 0
-; CHECK-NEXT:    v_writelane_b32 v23, s45, 37
-; CHECK-NEXT:    v_readlane_b32 s9, v22, 1
-; CHECK-NEXT:    v_readlane_b32 s10, v22, 2
-; CHECK-NEXT:    v_readlane_b32 s11, v22, 3
-; CHECK-NEXT:    v_readlane_b32 s12, v22, 4
-; CHECK-NEXT:    v_readlane_b32 s13, v22, 5
-; CHECK-NEXT:    v_readlane_b32 s14, v22, 6
-; CHECK-NEXT:    v_readlane_b32 s15, v22, 7
-; CHECK-NEXT:    v_readlane_b32 s0, v22, 24
-; CHECK-NEXT:    v_writelane_b32 v23, s46, 38
-; CHECK-NEXT:    ;;#ASMSTART
-; CHECK-NEXT:    ; use s[4:5]
-; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    ;;#ASMSTART
-; CHECK-NEXT:    ; use s[36:39]
-; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    ;;#ASMSTART
-; CHECK-NEXT:    ; use s[8:15]
-; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    v_readlane_b32 s8, v22, 8
-; CHECK-NEXT:    v_readlane_b32 s1, v22, 25
-; CHECK-NEXT:    v_readlane_b32 s2, v22, 26
-; CHECK-NEXT:    v_readlane_b32 s3, v22, 27
-; CHECK-NEXT:    v_writelane_b32 v23, s47, 39
-; CHECK-NEXT:    v_readlane_b32 s9, v22, 9
-; CHECK-NEXT:    v_readlane_b32 s10, v22, 10
-; CHECK-NEXT:    v_readlane_b32 s11, v22, 11
-; CHECK-NEXT:    v_readlane_b32 s12, v22, 12
-; CHECK-NEXT:    v_readlane_b32 s13, v22, 13
-; CHECK-NEXT:    v_readlane_b32 s14, v22, 14
-; CHECK-NEXT:    v_readlane_b32 s15, v22, 15
-; CHECK-NEXT:    v_readlane_b32 s16, v22, 16
-; CHECK-NEXT:    v_readlane_b32 s17, v22, 17
-; CHECK-NEXT:    v_readlane_b32 s18, v22, 18
-; CHECK-NEXT:    v_readlane_b32 s19, v22, 19
-; CHECK-NEXT:    v_readlane_b32 s20, v22, 20
-; CHECK-NEXT:    v_readlane_b32 s21, v22, 21
-; CHECK-NEXT:    v_readlane_b32 s22, v22, 22
-; CHECK-NEXT:    v_readlane_b32 s23, v22, 23
-; CHECK-NEXT:    ;;#ASMSTART
-; CHECK-NEXT:    ; use s[8:23]
-; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    ;;#ASMSTART
-; CHECK-NEXT:    ; use s[6:7]
-; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    ;;#ASMSTART
-; CHECK-NEXT:    ; use s[0:3]
-; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    v_readlane_b32 s0, v22, 28
-; CHECK-NEXT:    v_writelane_b32 v23, s48, 40
-; CHECK-NEXT:    v_readlane_b32 s1, v22, 29
-; CHECK-NEXT:    v_readlane_b32 s2, v22, 30
-; CHECK-NEXT:    v_readlane_b32 s3, v22, 31
-; CHECK-NEXT:    v_readlane_b32 s4, v22, 32
-; CHECK-NEXT:    v_readlane_b32 s5, v22, 33
-; CHECK-NEXT:    v_readlane_b32 s6, v22, 34
-; CHECK-NEXT:    v_readlane_b32 s7, v22, 35
-; CHECK-NEXT:    v_writelane_b32 v23, s49, 41
-; CHECK-NEXT:    ;;#ASMSTART
-; CHECK-NEXT:    ; use s[0:7]
-; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    v_readlane_b32 s0, v22, 36
-; CHECK-NEXT:    v_writelane_b32 v23, s50, 42
-; CHECK-NEXT:    v_readlane_b32 s1, v22, 37
-; CHECK-NEXT:    v_writelane_b32 v23, s51, 43
-; CHECK-NEXT:    v_readlane_b32 s2, v22, 38
-; CHECK-NEXT:    v_readlane_b32 s3, v22, 39
-; CHECK-NEXT:    v_readlane_b32 s4, v22, 40
-; CHECK-NEXT:    v_readlane_b32 s5, v22, 41
-; CHECK-NEXT:    v_readlane_b32 s6, v22, 42
-; CHECK-NEXT:    v_readlane_b32 s7, v22, 43
-; CHECK-NEXT:    v_readlane_b32 s8, v22, 44
-; CHECK-NEXT:    v_readlane_b32 s9, v22, 45
-; CHECK-NEXT:    v_readlane_b32 s10, v22, 46
-; CHECK-NEXT:    v_readlane_b32 s11, v22, 47
-; CHECK-NEXT:    v_readlane_b32 s12, v22, 48
-; CHECK-NEXT:    v_readlane_b32 s13, v22, 49
-; CHECK-NEXT:    v_readlane_b32 s14, v22, 50
-; CHECK-NEXT:    v_readlane_b32 s15, v22, 51
-; CHECK-NEXT:    ;;#ASMSTART
-; CHECK-NEXT:    ; use s[0:15]
-; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    v_readlane_b32 s0, v22, 52
-; CHECK-NEXT:    v_writelane_b32 v23, s52, 44
-; CHECK-NEXT:    v_readlane_b32 s1, v22, 53
-; CHECK-NEXT:    v_writelane_b32 v23, s53, 45
-; CHECK-NEXT:    ;;#ASMSTART
-; CHECK-NEXT:    ; use s[0:1]
-; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    v_readlane_b32 s0, v22, 54
-; CHECK-NEXT:    v_writelane_b32 v23, s54, 46
-; CHECK-NEXT:    v_readlane_b32 s1, v22, 55
-; CHECK-NEXT:    v_readlane_b32 s2, v22, 56
-; CHECK-NEXT:    v_readlane_b32 s3, v22, 57
-; CHECK-NEXT:    v_writelane_b32 v23, s55, 47
-; CHECK-NEXT:    ;;#ASMSTART
-; CHECK-NEXT:    ; use s[0:3]
-; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    v_readlane_b32 s0, v22, 58
-; CHECK-NEXT:    v_readlane_b32 s1, v22, 59
-; CHECK-NEXT:    v_readlane_b32 s2, v22, 60
-; CHECK-NEXT:    v_readlane_b32 s3, v22, 61
-; CHECK-NEXT:    v_readlane_b32 s4, v22, 62
-; CHECK-NEXT:    v_readlane_b32 s5, v22, 63
-; CHECK-NEXT:    v_readlane_b32 s6, v23, 0
-; CHECK-NEXT:    v_readlane_b32 s7, v23, 1
-; CHECK-NEXT:    ;;#ASMSTART
-; CHECK-NEXT:    ; use s[0:7]
-; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    v_readlane_b32 s0, v23, 2
-; CHECK-NEXT:    v_readlane_b32 s1, v23, 3
-; CHECK-NEXT:    v_readlane_b32 s2, v23, 4
-; CHECK-NEXT:    v_readlane_b32 s3, v23, 5
-; CHECK-NEXT:    v_readlane_b32 s4, v23, 6
-; CHECK-NEXT:    v_readlane_b32 s5, v23, 7
-; CHECK-NEXT:    v_readlane_b32 s6, v23, 8
-; CHECK-NEXT:    v_readlane_b32 s7, v23, 9
-; CHECK-NEXT:    v_readlane_b32 s8, v23, 10
-; CHECK-NEXT:    v_readlane_b32 s9, v23, 11
-; CHECK-NEXT:    v_readlane_b32 s10, v23, 12
-; CHECK-NEXT:    v_readlane_b32 s11, v23, 13
-; CHECK-NEXT:    v_readlane_b32 s12, v23, 14
-; CHECK-NEXT:    v_readlane_b32 s13, v23, 15
-; CHECK-NEXT:    v_readlane_b32 s14, v23, 16
-; CHECK-NEXT:    v_readlane_b32 s15, v23, 17
-; CHECK-NEXT:    ;;#ASMSTART
-; CHECK-NEXT:    ; use s[0:15]
-; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    v_readlane_b32 s0, v23, 18
-; CHECK-NEXT:    v_readlane_b32 s1, v23, 19
-; CHECK-NEXT:    ;;#ASMSTART
-; CHECK-NEXT:    ; use s[0:1]
-; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    v_readlane_b32 s0, v23, 20
-; CHECK-NEXT:    v_readlane_b32 s1, v23, 21
-; CHECK-NEXT:    v_readlane_b32 s2, v23, 22
-; CHECK-NEXT:    v_readlane_b32 s3, v23, 23
-; CHECK-NEXT:    ;;#ASMSTART
-; CHECK-NEXT:    ; use s[0:3]
-; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    v_readlane_b32 s0, v23, 24
-; CHECK-NEXT:    v_readlane_b32 s1, v23, 25
-; CHECK-NEXT:    v_readlane_b32 s2, v23, 26
-; CHECK-NEXT:    v_readlane_b32 s3, v23, 27
-; CHECK-NEXT:    v_readlane_b32 s4, v23, 28
-; CHECK-NEXT:    v_readlane_b32 s5, v23, 29
-; CHECK-NEXT:    v_readlane_b32 s6, v23, 30
-; CHECK-NEXT:    v_readlane_b32 s7, v23, 31
-; CHECK-NEXT:    ;;#ASMSTART
-; CHECK-NEXT:    ; use s[0:7]
-; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    v_readlane_b32 s0, v23, 32
-; CHECK-NEXT:    v_readlane_b32 s1, v23, 33
-; CHECK-NEXT:    v_readlane_b32 s2, v23, 34
-; CHECK-NEXT:    v_readlane_b32 s3, v23, 35
-; CHECK-NEXT:    v_readlane_b32 s4, v23, 36
-; CHECK-NEXT:    v_readlane_b32 s5, v23, 37
-; CHECK-NEXT:    v_readlane_b32 s6, v23, 38
-; CHECK-NEXT:    v_readlane_b32 s7, v23, 39
-; CHECK-NEXT:    v_readlane_b32 s8, v23, 40
-; CHECK-NEXT:    v_readlane_b32 s9, v23, 41
-; CHECK-NEXT:    v_readlane_b32 s10, v23, 42
-; CHECK-NEXT:    v_readlane_b32 s11, v23, 43
-; CHECK-NEXT:    v_readlane_b32 s12, v23, 44
-; CHECK-NEXT:    v_readlane_b32 s13, v23, 45
-; CHECK-NEXT:    v_readlane_b32 s14, v23, 46
-; CHECK-NEXT:    v_readlane_b32 s15, v23, 47
-; CHECK-NEXT:    ;;#ASMSTART
-; CHECK-NEXT:    ; use s[0:15]
-; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    s_endpgm
   call void asm sideeffect "", "~{v[0:7]}" () #0
   call void asm sideeffect "", "~{v[8:15]}" () #0
   call void asm sideeffect "", "~{v[16:19]}"() #0

>From 4c9596a91426d662fef8385dbde7a7aa49d3ea49 Mon Sep 17 00:00:00 2001
From: Christudasan Devadasan <Christudasan.Devadasan at amd.com>
Date: Wed, 12 Jun 2024 18:31:12 +0530
Subject: [PATCH 10/13] avoided the function attribute and mattr option.

---
 .../test/CodeGen/AMDGPU/wwm-regalloc-error.ll | 48 ++++++++++++++++---
 1 file changed, 42 insertions(+), 6 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/wwm-regalloc-error.ll b/llvm/test/CodeGen/AMDGPU/wwm-regalloc-error.ll
index e2c0bb2acb76c..becafa4708ee7 100644
--- a/llvm/test/CodeGen/AMDGPU/wwm-regalloc-error.ll
+++ b/llvm/test/CodeGen/AMDGPU/wwm-regalloc-error.ll
@@ -1,17 +1,53 @@
-; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-flat-for-global -stress-regalloc=3 -filetype=null %s 2>&1 | FileCheck %s
+; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -stress-regalloc=2 -filetype=null %s 2>&1 | FileCheck %s
 
 ; A negative test to capture the expected error when the VGPRs are insufficient for wwm-regalloc.
 
 ; CHECK: error: can't find enough VGPRs for wwm-regalloc
 
-define amdgpu_kernel void @test() #0 {
-  call void asm sideeffect "", "~{v[0:1]}" ()
+define amdgpu_kernel void @test(i32 %in) {
+entry:
+  call void asm sideeffect "", "~{v[0:7]}" ()
+  call void asm sideeffect "", "~{v[8:15]}" ()
+  call void asm sideeffect "", "~{v[16:23]}" ()
+  call void asm sideeffect "", "~{v[24:31]}" ()
+  call void asm sideeffect "", "~{v[32:39]}" ()
+  call void asm sideeffect "", "~{v[40:47]}" ()
+  call void asm sideeffect "", "~{v[48:55]}" ()
+  call void asm sideeffect "", "~{v[56:63]}" ()
+  call void asm sideeffect "", "~{v[64:71]}" ()
+  call void asm sideeffect "", "~{v[72:79]}" ()
+  call void asm sideeffect "", "~{v[80:87]}" ()
+  call void asm sideeffect "", "~{v[88:95]}" ()
+  call void asm sideeffect "", "~{v[96:103]}" ()
+  call void asm sideeffect "", "~{v[104:111]}" ()
+  call void asm sideeffect "", "~{v[112:119]}" ()
+  call void asm sideeffect "", "~{v[120:127]}" ()
+  call void asm sideeffect "", "~{v[128:135]}" ()
+  call void asm sideeffect "", "~{v[136:143]}" ()
+  call void asm sideeffect "", "~{v[144:151]}" ()
+  call void asm sideeffect "", "~{v[152:159]}" ()
+  call void asm sideeffect "", "~{v[160:167]}" ()
+  call void asm sideeffect "", "~{v[168:175]}" ()
+  call void asm sideeffect "", "~{v[176:183]}" ()
+  call void asm sideeffect "", "~{v[184:191]}" ()
+  call void asm sideeffect "", "~{v[192:199]}" ()
+  call void asm sideeffect "", "~{v[200:207]}" ()
+  call void asm sideeffect "", "~{v[208:215]}" ()
+  call void asm sideeffect "", "~{v[216:223]}" ()
+  call void asm sideeffect "", "~{v[224:231]}" ()
+  call void asm sideeffect "", "~{v[232:239]}" ()
+  call void asm sideeffect "", "~{v[240:247]}" ()
+  call void asm sideeffect "", "~{v[248:255]}" ()
   %val0 = call i32 asm sideeffect "; def $0", "=s" ()
   %val1 = call i32 asm sideeffect "; def $0", "=s" ()
-  store i32 10, ptr addrspace(1) null
+  %val2 = call i32 asm sideeffect "; def $0", "=s" ()
+  %cmp = icmp eq i32 %in, 0
+  br i1 %cmp, label %bb0, label %ret
+bb0:
   call void asm sideeffect "; use $0", "s"(i32 %val0)
   call void asm sideeffect "; use $0", "s"(i32 %val1)
+  call void asm sideeffect "; use $0", "s"(i32 %val2)
+  br label %ret
+ret:
   ret void
 }
-
-attributes #0 = { nounwind "amdgpu-num-vgpr"="2"}

>From a864d6c525a576a9dec3e7b296e731fcc92d01e6 Mon Sep 17 00:00:00 2001
From: Christudasan Devadasan <Christudasan.Devadasan at amd.com>
Date: Thu, 13 Jun 2024 13:31:42 +0530
Subject: [PATCH 11/13] reduced the clobbered vgpr list.

---
 .../test/CodeGen/AMDGPU/wwm-regalloc-error.ll | 24 -------------------
 1 file changed, 24 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/wwm-regalloc-error.ll b/llvm/test/CodeGen/AMDGPU/wwm-regalloc-error.ll
index becafa4708ee7..145f1e483cd99 100644
--- a/llvm/test/CodeGen/AMDGPU/wwm-regalloc-error.ll
+++ b/llvm/test/CodeGen/AMDGPU/wwm-regalloc-error.ll
@@ -14,30 +14,6 @@ entry:
   call void asm sideeffect "", "~{v[40:47]}" ()
   call void asm sideeffect "", "~{v[48:55]}" ()
   call void asm sideeffect "", "~{v[56:63]}" ()
-  call void asm sideeffect "", "~{v[64:71]}" ()
-  call void asm sideeffect "", "~{v[72:79]}" ()
-  call void asm sideeffect "", "~{v[80:87]}" ()
-  call void asm sideeffect "", "~{v[88:95]}" ()
-  call void asm sideeffect "", "~{v[96:103]}" ()
-  call void asm sideeffect "", "~{v[104:111]}" ()
-  call void asm sideeffect "", "~{v[112:119]}" ()
-  call void asm sideeffect "", "~{v[120:127]}" ()
-  call void asm sideeffect "", "~{v[128:135]}" ()
-  call void asm sideeffect "", "~{v[136:143]}" ()
-  call void asm sideeffect "", "~{v[144:151]}" ()
-  call void asm sideeffect "", "~{v[152:159]}" ()
-  call void asm sideeffect "", "~{v[160:167]}" ()
-  call void asm sideeffect "", "~{v[168:175]}" ()
-  call void asm sideeffect "", "~{v[176:183]}" ()
-  call void asm sideeffect "", "~{v[184:191]}" ()
-  call void asm sideeffect "", "~{v[192:199]}" ()
-  call void asm sideeffect "", "~{v[200:207]}" ()
-  call void asm sideeffect "", "~{v[208:215]}" ()
-  call void asm sideeffect "", "~{v[216:223]}" ()
-  call void asm sideeffect "", "~{v[224:231]}" ()
-  call void asm sideeffect "", "~{v[232:239]}" ()
-  call void asm sideeffect "", "~{v[240:247]}" ()
-  call void asm sideeffect "", "~{v[248:255]}" ()
   %val0 = call i32 asm sideeffect "; def $0", "=s" ()
   %val1 = call i32 asm sideeffect "; def $0", "=s" ()
   %val2 = call i32 asm sideeffect "; def $0", "=s" ()

>From 5e621d1b15d00092dbfa911ccaf8619d33ca4eb5 Mon Sep 17 00:00:00 2001
From: Christudasan Devadasan <Christudasan.Devadasan at amd.com>
Date: Fri, 14 Jun 2024 10:31:25 +0530
Subject: [PATCH 12/13] reserved the shifted wwm-regs and stopped marking them
 BB LiveIns.

---
 llvm/lib/Target/AMDGPU/SIFrameLowering.cpp    |  17 +--
 .../Target/AMDGPU/SIMachineFunctionInfo.cpp   |   1 +
 .../GlobalISel/llvm.amdgcn.set.inactive.ll    |   2 +-
 .../AMDGPU/accvgpr-spill-scc-clobber.mir      |  48 ++++----
 .../atomic_optimizations_pixelshader.ll       |  15 ++-
 .../test/CodeGen/AMDGPU/branch-relax-spill.ll |   4 +-
 .../AMDGPU/csr-sgpr-spill-live-ins.mir        |   6 +-
 .../CodeGen/AMDGPU/fold-reload-into-exec.mir  |  52 ++++----
 .../CodeGen/AMDGPU/fold-reload-into-m0.mir    |  16 +--
 llvm/test/CodeGen/AMDGPU/frame-index.mir      |   2 +-
 .../AMDGPU/global_atomics_scan_fadd.ll        |  10 +-
 .../AMDGPU/global_atomics_scan_fmax.ll        |   6 +-
 .../AMDGPU/global_atomics_scan_fmin.ll        |   6 +-
 .../AMDGPU/global_atomics_scan_fsub.ll        |  10 +-
 .../CodeGen/AMDGPU/insert-waitcnts-crash.ll   |  14 +--
 .../AMDGPU/pei-amdgpu-cs-chain-preserve.mir   |   6 +-
 .../CodeGen/AMDGPU/pei-amdgpu-cs-chain.mir    |   6 +-
 .../sgpr-spill-dead-frame-in-dbg-value.mir    |   3 -
 .../AMDGPU/sgpr-spill-overlap-wwm-reserve.mir |  18 +--
 .../AMDGPU/sgpr-spill-to-vmem-scc-clobber.mir |  24 ++--
 .../AMDGPU/spill-sgpr-used-for-exec-copy.mir  |   2 +-
 ...d-op-for-wwm-scratch-reg-spill-restore.mir |  18 +--
 .../CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir | 112 +++++++++---------
 23 files changed, 180 insertions(+), 218 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index 15afe387731dc..38afb67e525d2 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -1555,17 +1555,6 @@ void SIFrameLowering::determinePrologEpilogSGPRSaves(
   }
 }
 
-// Mark all WWM VGPRs as BB LiveIns.
-static void addWwmRegBBLiveIn(MachineFunction &MF) {
-  SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
-  for (MachineBasicBlock &MBB : MF) {
-    for (Register Reg : MFI->getWWMReservedRegs())
-      MBB.addLiveIn(Reg);
-
-    MBB.sortUniqueLiveIns();
-  }
-}
-
 // Only report VGPRs to generic code.
 void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
                                            BitVector &SavedVGPRs,
@@ -1619,10 +1608,8 @@ void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
   sort(SortedWWMVGPRs, std::greater<Register>());
   MFI->shiftWwmVGPRsToLowestRange(MF, SortedWWMVGPRs, SavedVGPRs);
 
-  if (MFI->isEntryFunction()) {
-    addWwmRegBBLiveIn(MF);
+  if (MFI->isEntryFunction())
     return;
-  }
 
   // Remove any VGPRs used in the return value because these do not need to be saved.
   // This prevents CSR restore from clobbering return VGPRs.
@@ -1652,8 +1639,6 @@ void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
   // allow the default insertion to handle them.
   for (auto &Reg : MFI->getWWMSpills())
     SavedVGPRs.reset(Reg.first);
-
-  addWwmRegBBLiveIn(MF);
 }
 
 void SIFrameLowering::determineCalleeSavesSGPR(MachineFunction &MF,
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index a1415b94ae7c7..f850116f18416 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -333,6 +333,7 @@ void SIMachineFunctionInfo::shiftWwmVGPRsToLowestRange(
     WWMVGPRs[I] = NewReg;
     WWMReservedRegs.remove(Reg);
     WWMReservedRegs.insert(NewReg);
+    MRI.reserveReg(NewReg, TRI);
 
     // Replace the register in SpillPhysVGPRs. This is needed to look for free
     // lanes while spilling special SGPRs like FP, BP, etc. during PEI.
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll
index e5f3b773b2771..cbee039df7fd0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll
@@ -50,8 +50,8 @@ define amdgpu_kernel void @set_inactive_scc(ptr addrspace(1) %out, i32 %in, <4 x
 ; GCN-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN-NEXT:    s_cmp_lg_u32 s2, 56
-; GCN-NEXT:    v_mov_b32_e32 v0, s3
 ; GCN-NEXT:    s_cselect_b32 s4, 1, 0
+; GCN-NEXT:    v_mov_b32_e32 v0, s3
 ; GCN-NEXT:    s_not_b64 exec, exec
 ; GCN-NEXT:    v_mov_b32_e32 v0, 42
 ; GCN-NEXT:    s_not_b64 exec, exec
diff --git a/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir b/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir
index 9794130d2b000..c91b686697b9d 100644
--- a/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir
+++ b/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir
@@ -20,7 +20,7 @@ body:             |
   ; GFX908-LABEL: name: agpr32_restore_clobber_scc
   ; GFX908: bb.0:
   ; GFX908-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
-  ; GFX908-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; GFX908-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; GFX908-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
@@ -36,7 +36,7 @@ body:             |
   ; GFX908-NEXT:   S_NOP 0
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT: bb.2:
-  ; GFX908-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; GFX908-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT:   S_ENDPGM 0, amdgpu_allvgprs
   ;
@@ -514,7 +514,7 @@ body:             |
   ; GFX908-FLATSCR-LABEL: name: agpr32_restore_clobber_scc
   ; GFX908-FLATSCR: bb.0:
   ; GFX908-FLATSCR-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
-  ; GFX908-FLATSCR-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; GFX908-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX908-FLATSCR-NEXT: {{  $}}
   ; GFX908-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; GFX908-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
@@ -531,7 +531,7 @@ body:             |
   ; GFX908-FLATSCR-NEXT:   S_NOP 0
   ; GFX908-FLATSCR-NEXT: {{  $}}
   ; GFX908-FLATSCR-NEXT: bb.2:
-  ; GFX908-FLATSCR-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; GFX908-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX908-FLATSCR-NEXT: {{  $}}
   ; GFX908-FLATSCR-NEXT:   S_ENDPGM 0, amdgpu_allvgprs
   ;
@@ -1038,7 +1038,7 @@ body:             |
   ; GFX908-LABEL: name: agpr64_restore_clobber_scc
   ; GFX908: bb.0:
   ; GFX908-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
-  ; GFX908-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; GFX908-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; GFX908-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
@@ -1056,7 +1056,7 @@ body:             |
   ; GFX908-NEXT:   S_NOP 0
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT: bb.2:
-  ; GFX908-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; GFX908-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT:   S_ENDPGM 0, amdgpu_allvgprs
   ;
@@ -1535,7 +1535,7 @@ body:             |
   ; GFX908-FLATSCR-LABEL: name: agpr64_restore_clobber_scc
   ; GFX908-FLATSCR: bb.0:
   ; GFX908-FLATSCR-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
-  ; GFX908-FLATSCR-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; GFX908-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX908-FLATSCR-NEXT: {{  $}}
   ; GFX908-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; GFX908-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
@@ -1554,7 +1554,7 @@ body:             |
   ; GFX908-FLATSCR-NEXT:   S_NOP 0
   ; GFX908-FLATSCR-NEXT: {{  $}}
   ; GFX908-FLATSCR-NEXT: bb.2:
-  ; GFX908-FLATSCR-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; GFX908-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX908-FLATSCR-NEXT: {{  $}}
   ; GFX908-FLATSCR-NEXT:   S_ENDPGM 0, amdgpu_allvgprs
   ;
@@ -2061,7 +2061,7 @@ body:             |
   ; GFX908-LABEL: name: agpr96_restore_clobber_scc
   ; GFX908: bb.0:
   ; GFX908-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
-  ; GFX908-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; GFX908-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; GFX908-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
@@ -2081,7 +2081,7 @@ body:             |
   ; GFX908-NEXT:   S_NOP 0
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT: bb.2:
-  ; GFX908-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; GFX908-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT:   S_ENDPGM 0, amdgpu_allvgprs
   ;
@@ -2561,7 +2561,7 @@ body:             |
   ; GFX908-FLATSCR-LABEL: name: agpr96_restore_clobber_scc
   ; GFX908-FLATSCR: bb.0:
   ; GFX908-FLATSCR-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
-  ; GFX908-FLATSCR-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; GFX908-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX908-FLATSCR-NEXT: {{  $}}
   ; GFX908-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; GFX908-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
@@ -2582,7 +2582,7 @@ body:             |
   ; GFX908-FLATSCR-NEXT:   S_NOP 0
   ; GFX908-FLATSCR-NEXT: {{  $}}
   ; GFX908-FLATSCR-NEXT: bb.2:
-  ; GFX908-FLATSCR-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; GFX908-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX908-FLATSCR-NEXT: {{  $}}
   ; GFX908-FLATSCR-NEXT:   S_ENDPGM 0, amdgpu_allvgprs
   ;
@@ -3089,7 +3089,7 @@ body:             |
   ; GFX908-LABEL: name: agpr32_save_clobber_scc
   ; GFX908: bb.0:
   ; GFX908-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
-  ; GFX908-NEXT:   liveins: $agpr0, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; GFX908-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; GFX908-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
@@ -3105,7 +3105,7 @@ body:             |
   ; GFX908-NEXT:   S_NOP 0
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT: bb.2:
-  ; GFX908-NEXT:   liveins: $agpr0, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; GFX908-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT:   S_ENDPGM 0, amdgpu_allvgprs
   ;
@@ -3583,7 +3583,7 @@ body:             |
   ; GFX908-FLATSCR-LABEL: name: agpr32_save_clobber_scc
   ; GFX908-FLATSCR: bb.0:
   ; GFX908-FLATSCR-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
-  ; GFX908-FLATSCR-NEXT:   liveins: $agpr0, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; GFX908-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0
   ; GFX908-FLATSCR-NEXT: {{  $}}
   ; GFX908-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; GFX908-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
@@ -3600,7 +3600,7 @@ body:             |
   ; GFX908-FLATSCR-NEXT:   S_NOP 0
   ; GFX908-FLATSCR-NEXT: {{  $}}
   ; GFX908-FLATSCR-NEXT: bb.2:
-  ; GFX908-FLATSCR-NEXT:   liveins: $agpr0, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; GFX908-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0
   ; GFX908-FLATSCR-NEXT: {{  $}}
   ; GFX908-FLATSCR-NEXT:   S_ENDPGM 0, amdgpu_allvgprs
   ;
@@ -4106,7 +4106,7 @@ body:             |
   ; GFX908-LABEL: name: agpr64_save_clobber_scc
   ; GFX908: bb.0:
   ; GFX908-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
-  ; GFX908-NEXT:   liveins: $agpr0_agpr1, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; GFX908-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0_agpr1
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; GFX908-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
@@ -4124,7 +4124,7 @@ body:             |
   ; GFX908-NEXT:   S_NOP 0
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT: bb.2:
-  ; GFX908-NEXT:   liveins: $agpr0_agpr1, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; GFX908-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0_agpr1
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT:   S_ENDPGM 0, amdgpu_allvgprs
   ;
@@ -4603,7 +4603,7 @@ body:             |
   ; GFX908-FLATSCR-LABEL: name: agpr64_save_clobber_scc
   ; GFX908-FLATSCR: bb.0:
   ; GFX908-FLATSCR-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
-  ; GFX908-FLATSCR-NEXT:   liveins: $agpr0_agpr1, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; GFX908-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0_agpr1
   ; GFX908-FLATSCR-NEXT: {{  $}}
   ; GFX908-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; GFX908-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
@@ -4622,7 +4622,7 @@ body:             |
   ; GFX908-FLATSCR-NEXT:   S_NOP 0
   ; GFX908-FLATSCR-NEXT: {{  $}}
   ; GFX908-FLATSCR-NEXT: bb.2:
-  ; GFX908-FLATSCR-NEXT:   liveins: $agpr0_agpr1, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; GFX908-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0_agpr1
   ; GFX908-FLATSCR-NEXT: {{  $}}
   ; GFX908-FLATSCR-NEXT:   S_ENDPGM 0, amdgpu_allvgprs
   ;
@@ -5127,7 +5127,7 @@ body:             |
   ; GFX908-LABEL: name: agpr96_save_clobber_scc
   ; GFX908: bb.0:
   ; GFX908-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
-  ; GFX908-NEXT:   liveins: $agpr0_agpr1, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; GFX908-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0_agpr1
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; GFX908-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
@@ -5147,7 +5147,7 @@ body:             |
   ; GFX908-NEXT:   S_NOP 0
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT: bb.2:
-  ; GFX908-NEXT:   liveins: $agpr0_agpr1, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; GFX908-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0_agpr1
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT:   S_ENDPGM 0, amdgpu_allvgprs
   ;
@@ -5627,7 +5627,7 @@ body:             |
   ; GFX908-FLATSCR-LABEL: name: agpr96_save_clobber_scc
   ; GFX908-FLATSCR: bb.0:
   ; GFX908-FLATSCR-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
-  ; GFX908-FLATSCR-NEXT:   liveins: $agpr0_agpr1, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; GFX908-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0_agpr1
   ; GFX908-FLATSCR-NEXT: {{  $}}
   ; GFX908-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; GFX908-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
@@ -5648,7 +5648,7 @@ body:             |
   ; GFX908-FLATSCR-NEXT:   S_NOP 0
   ; GFX908-FLATSCR-NEXT: {{  $}}
   ; GFX908-FLATSCR-NEXT: bb.2:
-  ; GFX908-FLATSCR-NEXT:   liveins: $agpr0_agpr1, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; GFX908-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0_agpr1
   ; GFX908-FLATSCR-NEXT: {{  $}}
   ; GFX908-FLATSCR-NEXT:   S_ENDPGM 0, amdgpu_allvgprs
   ;
diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll
index e7627c1dca6df..29704959fc176 100644
--- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll
@@ -276,8 +276,8 @@ define amdgpu_ps void @add_i32_varying(ptr addrspace(8) inreg %out, ptr addrspac
 ; GFX8-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX8-NEXT:    s_mov_b64 exec, s[10:11]
 ; GFX8-NEXT:    v_mbcnt_lo_u32_b32 v3, exec_lo, 0
-; GFX8-NEXT:    v_mov_b32_e32 v2, v0
 ; GFX8-NEXT:    v_mbcnt_hi_u32_b32 v3, exec_hi, v3
+; GFX8-NEXT:    v_mov_b32_e32 v2, v0
 ; GFX8-NEXT:    s_not_b64 exec, exec
 ; GFX8-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX8-NEXT:    s_not_b64 exec, exec
@@ -333,8 +333,8 @@ define amdgpu_ps void @add_i32_varying(ptr addrspace(8) inreg %out, ptr addrspac
 ; GFX9-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX9-NEXT:    s_mov_b64 exec, s[10:11]
 ; GFX9-NEXT:    v_mbcnt_lo_u32_b32 v3, exec_lo, 0
-; GFX9-NEXT:    v_mov_b32_e32 v2, v0
 ; GFX9-NEXT:    v_mbcnt_hi_u32_b32 v3, exec_hi, v3
+; GFX9-NEXT:    v_mov_b32_e32 v2, v0
 ; GFX9-NEXT:    s_not_b64 exec, exec
 ; GFX9-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX9-NEXT:    s_not_b64 exec, exec
@@ -410,8 +410,8 @@ define amdgpu_ps void @add_i32_varying(ptr addrspace(8) inreg %out, ptr addrspac
 ; GFX1064-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
 ; GFX1064-NEXT:    s_or_saveexec_b64 s[10:11], -1
 ; GFX1064-NEXT:    v_readlane_b32 s12, v1, 63
-; GFX1064-NEXT:    v_writelane_b32 v3, s13, 32
 ; GFX1064-NEXT:    v_readlane_b32 s14, v1, 47
+; GFX1064-NEXT:    v_writelane_b32 v3, s13, 32
 ; GFX1064-NEXT:    s_mov_b64 exec, s[10:11]
 ; GFX1064-NEXT:    v_mbcnt_hi_u32_b32 v0, exec_hi, v0
 ; GFX1064-NEXT:    s_or_saveexec_b64 s[10:11], -1
@@ -463,8 +463,8 @@ define amdgpu_ps void @add_i32_varying(ptr addrspace(8) inreg %out, ptr addrspac
 ; GFX1032-NEXT:    v_mov_b32_e32 v2, v1
 ; GFX1032-NEXT:    v_permlanex16_b32 v2, v2, -1, -1
 ; GFX1032-NEXT:    v_add_nc_u32_dpp v1, v2, v1 quad_perm:[0,1,2,3] row_mask:0xa bank_mask:0xf
-; GFX1032-NEXT:    v_mov_b32_dpp v3, v1 row_shr:1 row_mask:0xf bank_mask:0xf
 ; GFX1032-NEXT:    v_readlane_b32 s11, v1, 31
+; GFX1032-NEXT:    v_mov_b32_dpp v3, v1 row_shr:1 row_mask:0xf bank_mask:0xf
 ; GFX1032-NEXT:    v_readlane_b32 s10, v1, 15
 ; GFX1032-NEXT:    s_mov_b32 exec_lo, s9
 ; GFX1032-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
@@ -536,15 +536,14 @@ define amdgpu_ps void @add_i32_varying(ptr addrspace(8) inreg %out, ptr addrspac
 ; GFX1164-NEXT:    v_mbcnt_lo_u32_b32 v0, exec_lo, 0
 ; GFX1164-NEXT:    s_or_saveexec_b64 s[10:11], -1
 ; GFX1164-NEXT:    v_readlane_b32 s12, v1, 63
-; GFX1164-NEXT:    v_writelane_b32 v3, s13, 32
 ; GFX1164-NEXT:    v_readlane_b32 s14, v1, 47
+; GFX1164-NEXT:    v_writelane_b32 v3, s13, 32
 ; GFX1164-NEXT:    s_mov_b64 exec, s[10:11]
-; GFX1164-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1164-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
 ; GFX1164-NEXT:    v_mbcnt_hi_u32_b32 v0, exec_hi, v0
 ; GFX1164-NEXT:    s_or_saveexec_b64 s[10:11], -1
 ; GFX1164-NEXT:    v_writelane_b32 v3, s14, 48
 ; GFX1164-NEXT:    s_mov_b64 exec, s[10:11]
-; GFX1164-NEXT:    s_delay_alu instid0(VALU_DEP_2)
 ; GFX1164-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
 ; GFX1164-NEXT:    ; implicit-def: $vgpr0
 ; GFX1164-NEXT:    s_and_saveexec_b64 s[10:11], vcc
@@ -599,8 +598,8 @@ define amdgpu_ps void @add_i32_varying(ptr addrspace(8) inreg %out, ptr addrspac
 ; GFX1132-NEXT:    v_permlanex16_b32 v2, v2, -1, -1
 ; GFX1132-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1132-NEXT:    v_add_nc_u32_dpp v1, v2, v1 quad_perm:[0,1,2,3] row_mask:0xa bank_mask:0xf
-; GFX1132-NEXT:    v_mov_b32_dpp v3, v1 row_shr:1 row_mask:0xf bank_mask:0xf
 ; GFX1132-NEXT:    v_readlane_b32 s11, v1, 31
+; GFX1132-NEXT:    v_mov_b32_dpp v3, v1 row_shr:1 row_mask:0xf bank_mask:0xf
 ; GFX1132-NEXT:    v_readlane_b32 s10, v1, 15
 ; GFX1132-NEXT:    s_mov_b32 exec_lo, s9
 ; GFX1132-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
diff --git a/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll b/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll
index 6201d7341898f..5f8909a1e60f8 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll
+++ b/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll
@@ -971,12 +971,12 @@ define void @spill_func(ptr addrspace(1) %arg) #0 {
 ; CHECK-NEXT:    v_writelane_b32 v1, s98, 3
 ; CHECK-NEXT:    v_writelane_b32 v0, s92, 61
 ; CHECK-NEXT:    v_writelane_b32 v1, s99, 4
+; CHECK-NEXT:    s_mov_b32 s49, s12
 ; CHECK-NEXT:    v_writelane_b32 v0, s93, 62
 ; CHECK-NEXT:    v_writelane_b32 v1, s100, 5
-; CHECK-NEXT:    s_mov_b32 s49, s12
+; CHECK-NEXT:    s_cmp_eq_u32 s49, 0
 ; CHECK-NEXT:    v_writelane_b32 v0, s94, 63
 ; CHECK-NEXT:    v_writelane_b32 v1, s101, 6
-; CHECK-NEXT:    s_cmp_eq_u32 s49, 0
 ; CHECK-NEXT:    ;;#ASMSTART
 ; CHECK-NEXT:    s_mov_b32 s0, 0
 ; CHECK-NEXT:    ;;#ASMEND
diff --git a/llvm/test/CodeGen/AMDGPU/csr-sgpr-spill-live-ins.mir b/llvm/test/CodeGen/AMDGPU/csr-sgpr-spill-live-ins.mir
index d5cdf584a75de..a14d515688a8b 100644
--- a/llvm/test/CodeGen/AMDGPU/csr-sgpr-spill-live-ins.mir
+++ b/llvm/test/CodeGen/AMDGPU/csr-sgpr-spill-live-ins.mir
@@ -14,10 +14,10 @@ body: |
   ; CHECK-LABEL: name: def_csr_sgpr
   ; CHECK: bb.0:
   ; CHECK-NEXT:   successors: %bb.1(0x80000000)
-  ; CHECK-NEXT:   liveins: $sgpr42, $sgpr43, $sgpr46, $sgpr47, $vgpr0
+  ; CHECK-NEXT:   liveins: $sgpr42, $sgpr43, $sgpr46, $sgpr47
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-  ; CHECK-NEXT:   BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
+  ; CHECK-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
   ; CHECK-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
   ; CHECK-NEXT:   $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr42, 0, $vgpr0
   ; CHECK-NEXT:   $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr43, 1, $vgpr0
@@ -26,8 +26,6 @@ body: |
   ; CHECK-NEXT:   S_NOP 0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1:
-  ; CHECK-NEXT:   liveins: $vgpr0
-  ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   $sgpr42 = S_MOV_B32 0
   ; CHECK-NEXT:   $sgpr43 = S_MOV_B32 1
   ; CHECK-NEXT:   $sgpr46_sgpr47 = S_MOV_B64 2
diff --git a/llvm/test/CodeGen/AMDGPU/fold-reload-into-exec.mir b/llvm/test/CodeGen/AMDGPU/fold-reload-into-exec.mir
index b3f0c1f6dd80e..5f36d5403ebcf 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-reload-into-exec.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-reload-into-exec.mir
@@ -12,15 +12,13 @@ machineFunctionInfo:
 body:             |
   bb.0:
     ; CHECK-LABEL: name: merge_sgpr_spill_into_copy_from_exec_lo
-    ; CHECK: liveins: $vgpr0
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: S_NOP 0, implicit-def $exec_lo
+    ; CHECK: S_NOP 0, implicit-def $exec_lo
     ; CHECK-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF
-    ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, killed $vgpr0
+    ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0
     ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0
     ; CHECK-NEXT: S_NOP 0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0, implicit killed renamable $sgpr0
-    ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 killed $vgpr0, 0
+    ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0
     ; CHECK-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0
     ; CHECK-NEXT: S_SENDMSG 0, implicit $m0, implicit $exec
     S_NOP 0, implicit-def $exec_lo
@@ -39,15 +37,13 @@ machineFunctionInfo:
 body:             |
   bb.0:
     ; CHECK-LABEL: name: merge_sgpr_spill_into_copy_from_exec_hi
-    ; CHECK: liveins: $vgpr0
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: S_NOP 0, implicit-def $exec_hi
+    ; CHECK: S_NOP 0, implicit-def $exec_hi
     ; CHECK-NEXT: $sgpr0 = S_MOV_B32 $exec_hi
     ; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF
-    ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, killed $vgpr0
+    ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0
     ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0
     ; CHECK-NEXT: S_NOP 0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0, implicit killed renamable $sgpr0
-    ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 killed $vgpr0, 0
+    ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0
     ; CHECK-NEXT: $exec_hi = S_MOV_B32 killed $sgpr0
     ; CHECK-NEXT: S_SENDMSG 0, implicit $m0, implicit $exec
     S_NOP 0, implicit-def $exec_hi
@@ -66,18 +62,16 @@ machineFunctionInfo:
 body:             |
   bb.0:
     ; CHECK-LABEL: name: merge_sgpr_spill_into_copy_from_exec
-    ; CHECK: liveins: $vgpr0
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: S_NOP 0, implicit-def $exec
+    ; CHECK: S_NOP 0, implicit-def $exec
     ; CHECK-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF
-    ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, killed $vgpr0, implicit-def $sgpr0_sgpr1, implicit $sgpr0_sgpr1
-    ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr1, 1, killed $vgpr0, implicit $sgpr0_sgpr1
+    ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0, implicit-def $sgpr0_sgpr1, implicit $sgpr0_sgpr1
+    ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr1, 1, $vgpr0, implicit $sgpr0_sgpr1
     ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr0_sgpr1
     ; CHECK-NEXT: $sgpr1 = V_READLANE_B32 $vgpr0, 1
     ; CHECK-NEXT: S_NOP 0, implicit-def dead renamable $sgpr2_sgpr3, implicit-def dead renamable $sgpr0_sgpr1, implicit killed renamable $sgpr0_sgpr1
     ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr0_sgpr1
-    ; CHECK-NEXT: $sgpr1 = V_READLANE_B32 killed $vgpr0, 1
+    ; CHECK-NEXT: $sgpr1 = V_READLANE_B32 $vgpr0, 1
     ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1
     ; CHECK-NEXT: S_SENDMSG 0, implicit $m0, implicit $exec
     S_NOP 0, implicit-def $exec
@@ -99,14 +93,12 @@ machineFunctionInfo:
 body:             |
   bb.0:
     ; CHECK-LABEL: name: reload_sgpr_spill_into_copy_to_exec_lo
-    ; CHECK: liveins: $vgpr0
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: S_NOP 0, implicit-def renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def $exec_lo
+    ; CHECK: S_NOP 0, implicit-def renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def $exec_lo
     ; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF
-    ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, killed $vgpr0
+    ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0
     ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0
     ; CHECK-NEXT: S_NOP 0, implicit killed renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0
-    ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 killed $vgpr0, 0
+    ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0
     ; CHECK-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0
     ; CHECK-NEXT: S_SENDMSG 0, implicit $m0, implicit $exec
     S_NOP 0, implicit-def %0:sreg_32, implicit-def %1:sreg_32, implicit-def $exec_lo
@@ -124,14 +116,12 @@ machineFunctionInfo:
 body:             |
   bb.0:
     ; CHECK-LABEL: name: reload_sgpr_spill_into_copy_to_exec_hi
-    ; CHECK: liveins: $vgpr0
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: S_NOP 0, implicit-def renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def $exec_hi
+    ; CHECK: S_NOP 0, implicit-def renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def $exec_hi
     ; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF
-    ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, killed $vgpr0
+    ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0
     ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0
     ; CHECK-NEXT: S_NOP 0, implicit killed renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0
-    ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 killed $vgpr0, 0
+    ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0
     ; CHECK-NEXT: $exec_hi = S_MOV_B32 killed $sgpr0
     ; CHECK-NEXT: S_SENDMSG 0, implicit $m0, implicit $exec
     S_NOP 0, implicit-def %0:sreg_32, implicit-def %1:sreg_32, implicit-def $exec_hi
@@ -149,17 +139,15 @@ machineFunctionInfo:
 body:             |
   bb.0:
     ; CHECK-LABEL: name: reload_sgpr_spill_into_copy_to_exec
-    ; CHECK: liveins: $vgpr0
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: S_NOP 0, implicit-def renamable $sgpr0_sgpr1, implicit-def dead renamable $sgpr2_sgpr3, implicit-def $exec
+    ; CHECK: S_NOP 0, implicit-def renamable $sgpr0_sgpr1, implicit-def dead renamable $sgpr2_sgpr3, implicit-def $exec
     ; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF
-    ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, killed $vgpr0, implicit-def $sgpr0_sgpr1, implicit $sgpr0_sgpr1
-    ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr1, 1, killed $vgpr0, implicit $sgpr0_sgpr1
+    ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0, implicit-def $sgpr0_sgpr1, implicit $sgpr0_sgpr1
+    ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr1, 1, $vgpr0, implicit $sgpr0_sgpr1
     ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr0_sgpr1
     ; CHECK-NEXT: $sgpr1 = V_READLANE_B32 $vgpr0, 1
     ; CHECK-NEXT: S_NOP 0, implicit killed renamable $sgpr0_sgpr1, implicit-def dead renamable $sgpr2_sgpr3, implicit-def dead renamable $sgpr0_sgpr1
     ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr0_sgpr1
-    ; CHECK-NEXT: $sgpr1 = V_READLANE_B32 killed $vgpr0, 1
+    ; CHECK-NEXT: $sgpr1 = V_READLANE_B32 $vgpr0, 1
     ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1
     ; CHECK-NEXT: S_SENDMSG 0, implicit $m0, implicit $exec
     S_NOP 0, implicit-def %0:sreg_64, implicit-def %1:sreg_64, implicit-def $exec
diff --git a/llvm/test/CodeGen/AMDGPU/fold-reload-into-m0.mir b/llvm/test/CodeGen/AMDGPU/fold-reload-into-m0.mir
index a7529efc78446..1c2436bd6b6cd 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-reload-into-m0.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-reload-into-m0.mir
@@ -13,15 +13,13 @@ body:             |
   bb.0:
 
     ; CHECK-LABEL: name: merge_sgpr_spill_into_copy_from_m0
-    ; CHECK: liveins: $vgpr0
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: S_NOP 0, implicit-def $m0
+    ; CHECK: S_NOP 0, implicit-def $m0
     ; CHECK-NEXT: $sgpr0 = S_MOV_B32 $m0
     ; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF
-    ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, killed $vgpr0
+    ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0
     ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0
     ; CHECK-NEXT: S_NOP 0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0, implicit killed renamable $sgpr0
-    ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 killed $vgpr0, 0
+    ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0
     ; CHECK-NEXT: $m0 = S_MOV_B32 killed $sgpr0
     ; CHECK-NEXT: S_NOP 0
     ; CHECK-NEXT: S_SENDMSG 0, implicit $m0, implicit $exec
@@ -45,14 +43,12 @@ body:             |
   bb.0:
 
     ; CHECK-LABEL: name: reload_sgpr_spill_into_copy_to_m0
-    ; CHECK: liveins: $vgpr0
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF
+    ; CHECK: $vgpr0 = IMPLICIT_DEF
     ; CHECK-NEXT: S_NOP 0, implicit-def renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def $m0
-    ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, killed $vgpr0
+    ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0
     ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0
     ; CHECK-NEXT: S_NOP 0, implicit killed renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0
-    ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 killed $vgpr0, 0
+    ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0
     ; CHECK-NEXT: $m0 = S_MOV_B32 killed $sgpr0
     ; CHECK-NEXT: S_NOP 0
     ; CHECK-NEXT: S_SENDMSG 0, implicit $m0, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/frame-index.mir b/llvm/test/CodeGen/AMDGPU/frame-index.mir
index d8736c5276a26..7032bec1b533d 100644
--- a/llvm/test/CodeGen/AMDGPU/frame-index.mir
+++ b/llvm/test/CodeGen/AMDGPU/frame-index.mir
@@ -164,7 +164,7 @@ body:             |
   bb.0:
     liveins: $sgpr30_sgpr31, $sgpr10
     ; GCN-LABEL: name: func_add_constant_to_fi_uniform_live_SCC_i32
-    ; GCN: liveins: $sgpr10, $sgpr30_sgpr31
+    ; GCN: liveins: $sgpr30_sgpr31, $sgpr10
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: renamable $sgpr4 = nuw S_ADD_U32 $sgpr10, 4, implicit-def $scc
     ; GCN-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll
index 75089b2a8760c..6555ceb3ed338 100644
--- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll
@@ -705,8 +705,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
 ; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-DPP-NEXT:    v_mbcnt_lo_u32_b32 v1, exec_lo, 0
-; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, v0
 ; GFX9-DPP-NEXT:    v_mbcnt_hi_u32_b32 v1, exec_hi, v1
+; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, v0
 ; GFX9-DPP-NEXT:    s_not_b64 exec, exec
 ; GFX9-DPP-NEXT:    v_bfrev_b32_e32 v3, 1
 ; GFX9-DPP-NEXT:    s_not_b64 exec, exec
@@ -1909,8 +1909,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope
 ; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-DPP-NEXT:    v_mbcnt_lo_u32_b32 v1, exec_lo, 0
-; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, v0
 ; GFX9-DPP-NEXT:    v_mbcnt_hi_u32_b32 v1, exec_hi, v1
+; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, v0
 ; GFX9-DPP-NEXT:    s_not_b64 exec, exec
 ; GFX9-DPP-NEXT:    v_bfrev_b32_e32 v3, 1
 ; GFX9-DPP-NEXT:    s_not_b64 exec, exec
@@ -3113,8 +3113,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
 ; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-DPP-NEXT:    v_mbcnt_lo_u32_b32 v1, exec_lo, 0
-; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, v0
 ; GFX9-DPP-NEXT:    v_mbcnt_hi_u32_b32 v1, exec_hi, v1
+; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, v0
 ; GFX9-DPP-NEXT:    s_not_b64 exec, exec
 ; GFX9-DPP-NEXT:    v_bfrev_b32_e32 v3, 1
 ; GFX9-DPP-NEXT:    s_not_b64 exec, exec
@@ -3833,8 +3833,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
 ; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-DPP-NEXT:    v_mbcnt_lo_u32_b32 v1, exec_lo, 0
-; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, v0
 ; GFX9-DPP-NEXT:    v_mbcnt_hi_u32_b32 v1, exec_hi, v1
+; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, v0
 ; GFX9-DPP-NEXT:    s_not_b64 exec, exec
 ; GFX9-DPP-NEXT:    v_bfrev_b32_e32 v3, 1
 ; GFX9-DPP-NEXT:    s_not_b64 exec, exec
@@ -5036,8 +5036,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop
 ; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-DPP-NEXT:    v_mbcnt_lo_u32_b32 v1, exec_lo, 0
-; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, v0
 ; GFX9-DPP-NEXT:    v_mbcnt_hi_u32_b32 v1, exec_hi, v1
+; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, v0
 ; GFX9-DPP-NEXT:    s_not_b64 exec, exec
 ; GFX9-DPP-NEXT:    v_bfrev_b32_e32 v3, 1
 ; GFX9-DPP-NEXT:    s_not_b64 exec, exec
diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll
index c784489fb43b3..6548792180a0e 100644
--- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll
@@ -771,8 +771,8 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
 ; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-DPP-NEXT:    v_mbcnt_lo_u32_b32 v1, exec_lo, 0
-; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, v0
 ; GFX9-DPP-NEXT:    v_mbcnt_hi_u32_b32 v1, exec_hi, v1
+; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, v0
 ; GFX9-DPP-NEXT:    s_not_b64 exec, exec
 ; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, 0xff800000
 ; GFX9-DPP-NEXT:    s_not_b64 exec, exec
@@ -1950,8 +1950,8 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
 ; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-DPP-NEXT:    v_mbcnt_lo_u32_b32 v1, exec_lo, 0
-; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, v0
 ; GFX9-DPP-NEXT:    v_mbcnt_hi_u32_b32 v1, exec_hi, v1
+; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, v0
 ; GFX9-DPP-NEXT:    s_not_b64 exec, exec
 ; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, 0xff800000
 ; GFX9-DPP-NEXT:    s_not_b64 exec, exec
@@ -3129,8 +3129,8 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
 ; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-DPP-NEXT:    v_mbcnt_lo_u32_b32 v1, exec_lo, 0
-; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, v0
 ; GFX9-DPP-NEXT:    v_mbcnt_hi_u32_b32 v1, exec_hi, v1
+; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, v0
 ; GFX9-DPP-NEXT:    s_not_b64 exec, exec
 ; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, 0xff800000
 ; GFX9-DPP-NEXT:    s_not_b64 exec, exec
diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll
index 70e8c94475d31..6936cdc4d379a 100644
--- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll
@@ -771,8 +771,8 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
 ; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-DPP-NEXT:    v_mbcnt_lo_u32_b32 v1, exec_lo, 0
-; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, v0
 ; GFX9-DPP-NEXT:    v_mbcnt_hi_u32_b32 v1, exec_hi, v1
+; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, v0
 ; GFX9-DPP-NEXT:    s_not_b64 exec, exec
 ; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX9-DPP-NEXT:    s_not_b64 exec, exec
@@ -1950,8 +1950,8 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
 ; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-DPP-NEXT:    v_mbcnt_lo_u32_b32 v1, exec_lo, 0
-; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, v0
 ; GFX9-DPP-NEXT:    v_mbcnt_hi_u32_b32 v1, exec_hi, v1
+; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, v0
 ; GFX9-DPP-NEXT:    s_not_b64 exec, exec
 ; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX9-DPP-NEXT:    s_not_b64 exec, exec
@@ -3129,8 +3129,8 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
 ; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-DPP-NEXT:    v_mbcnt_lo_u32_b32 v1, exec_lo, 0
-; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, v0
 ; GFX9-DPP-NEXT:    v_mbcnt_hi_u32_b32 v1, exec_hi, v1
+; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, v0
 ; GFX9-DPP-NEXT:    s_not_b64 exec, exec
 ; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX9-DPP-NEXT:    s_not_b64 exec, exec
diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll
index ce23280e44c46..5cb57703c01d9 100644
--- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll
@@ -783,8 +783,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
 ; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-DPP-NEXT:    v_mbcnt_lo_u32_b32 v1, exec_lo, 0
-; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, v0
 ; GFX9-DPP-NEXT:    v_mbcnt_hi_u32_b32 v1, exec_hi, v1
+; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, v0
 ; GFX9-DPP-NEXT:    s_not_b64 exec, exec
 ; GFX9-DPP-NEXT:    v_bfrev_b32_e32 v3, 1
 ; GFX9-DPP-NEXT:    s_not_b64 exec, exec
@@ -2013,8 +2013,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope
 ; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-DPP-NEXT:    v_mbcnt_lo_u32_b32 v1, exec_lo, 0
-; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, v0
 ; GFX9-DPP-NEXT:    v_mbcnt_hi_u32_b32 v1, exec_hi, v1
+; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, v0
 ; GFX9-DPP-NEXT:    s_not_b64 exec, exec
 ; GFX9-DPP-NEXT:    v_bfrev_b32_e32 v3, 1
 ; GFX9-DPP-NEXT:    s_not_b64 exec, exec
@@ -3243,8 +3243,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
 ; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-DPP-NEXT:    v_mbcnt_lo_u32_b32 v1, exec_lo, 0
-; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, v0
 ; GFX9-DPP-NEXT:    v_mbcnt_hi_u32_b32 v1, exec_hi, v1
+; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, v0
 ; GFX9-DPP-NEXT:    s_not_b64 exec, exec
 ; GFX9-DPP-NEXT:    v_bfrev_b32_e32 v3, 1
 ; GFX9-DPP-NEXT:    s_not_b64 exec, exec
@@ -4015,8 +4015,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
 ; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-DPP-NEXT:    v_mbcnt_lo_u32_b32 v1, exec_lo, 0
-; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, v0
 ; GFX9-DPP-NEXT:    v_mbcnt_hi_u32_b32 v1, exec_hi, v1
+; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, v0
 ; GFX9-DPP-NEXT:    s_not_b64 exec, exec
 ; GFX9-DPP-NEXT:    v_bfrev_b32_e32 v3, 1
 ; GFX9-DPP-NEXT:    s_not_b64 exec, exec
@@ -5244,8 +5244,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop
 ; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-DPP-NEXT:    v_mbcnt_lo_u32_b32 v1, exec_lo, 0
-; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, v0
 ; GFX9-DPP-NEXT:    v_mbcnt_hi_u32_b32 v1, exec_hi, v1
+; GFX9-DPP-NEXT:    v_mov_b32_e32 v3, v0
 ; GFX9-DPP-NEXT:    s_not_b64 exec, exec
 ; GFX9-DPP-NEXT:    v_bfrev_b32_e32 v3, 1
 ; GFX9-DPP-NEXT:    s_not_b64 exec, exec
diff --git a/llvm/test/CodeGen/AMDGPU/insert-waitcnts-crash.ll b/llvm/test/CodeGen/AMDGPU/insert-waitcnts-crash.ll
index 9e336a714ca67..eef51acc4e12e 100644
--- a/llvm/test/CodeGen/AMDGPU/insert-waitcnts-crash.ll
+++ b/llvm/test/CodeGen/AMDGPU/insert-waitcnts-crash.ll
@@ -7,13 +7,13 @@ define fastcc i32 @foo() {
   ; CHECK-LABEL: name: foo
   ; CHECK: bb.0 (%ir-block.0):
   ; CHECK-NEXT:   successors: %bb.1(0x80000000)
-  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr30, $sgpr31, $vgpr31, $vgpr40, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr30, $sgpr31, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   S_WAITCNT 0
   ; CHECK-NEXT:   $sgpr16 = S_MOV_B32 $sgpr33
   ; CHECK-NEXT:   $sgpr33 = S_MOV_B32 $sgpr32
   ; CHECK-NEXT:   $sgpr17 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-  ; CHECK-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+  ; CHECK-NEXT:   BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
   ; CHECK-NEXT:   $exec_lo = S_MOV_B32 killed $sgpr17
   ; CHECK-NEXT:   $sgpr32 = frame-setup S_ADDK_I32 $sgpr32, 512, implicit-def dead $scc
   ; CHECK-NEXT:   $vgpr40 = V_WRITELANE_B32 killed $sgpr16, 2, undef $vgpr40
@@ -26,24 +26,22 @@ define fastcc i32 @foo() {
   ; CHECK-NEXT:   BUFFER_GL1_INV implicit $exec
   ; CHECK-NEXT:   BUFFER_GL0_INV implicit $exec
   ; CHECK-NEXT:   renamable $sgpr16_sgpr17 = S_LOAD_DWORDX2_IMM killed renamable $sgpr16_sgpr17, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4)
-  ; CHECK-NEXT:   $vgpr40 = V_WRITELANE_B32 killed $sgpr30, 0, killed $vgpr40
-  ; CHECK-NEXT:   $vgpr40 = V_WRITELANE_B32 killed $sgpr31, 1, killed $vgpr40
+  ; CHECK-NEXT:   $vgpr40 = V_WRITELANE_B32 killed $sgpr30, 0, $vgpr40
+  ; CHECK-NEXT:   $vgpr40 = V_WRITELANE_B32 killed $sgpr31, 1, $vgpr40
   ; CHECK-NEXT:   S_WAITCNT 49279
   ; CHECK-NEXT:   dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr16_sgpr17, @bar, csr_amdgpu, implicit killed $sgpr4_sgpr5, implicit killed $sgpr6_sgpr7, implicit killed $sgpr8_sgpr9, implicit killed $sgpr10_sgpr11, implicit killed $sgpr12, implicit killed $sgpr13, implicit killed $sgpr14, implicit killed $sgpr15, implicit killed $vgpr31, implicit $sgpr0_sgpr1_sgpr2_sgpr3
   ; CHECK-NEXT:   $vcc_lo = S_MOV_B32 $exec_lo
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1 (%ir-block.1):
   ; CHECK-NEXT:   successors: %bb.2(0x04000000), %bb.1(0x7c000000)
-  ; CHECK-NEXT:   liveins: $vcc_lo, $vgpr40
+  ; CHECK-NEXT:   liveins: $vcc_lo
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.1, implicit $vcc_lo
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.DummyReturnBlock:
-  ; CHECK-NEXT:   liveins: $vgpr40
-  ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   $sgpr31 = V_READLANE_B32 $vgpr40, 1
   ; CHECK-NEXT:   $sgpr30 = V_READLANE_B32 $vgpr40, 0
-  ; CHECK-NEXT:   $sgpr4 = V_READLANE_B32 killed $vgpr40, 2
+  ; CHECK-NEXT:   $sgpr4 = V_READLANE_B32 $vgpr40, 2
   ; CHECK-NEXT:   $sgpr5 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
   ; CHECK-NEXT:   $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
   ; CHECK-NEXT:   $exec_lo = S_MOV_B32 killed $sgpr5
diff --git a/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain-preserve.mir b/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain-preserve.mir
index 11280b5008b52..4734155219d5a 100644
--- a/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain-preserve.mir
+++ b/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain-preserve.mir
@@ -67,7 +67,7 @@ body:             |
     liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9, $vgpr10
 
     ; GCN-LABEL: name: preserve_all_lanes_wwm_above_args
-    ; GCN: liveins: $sgpr0, $sgpr35, $vgpr0, $vgpr8, $vgpr9
+    ; GCN: liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr0
     ; GCN-NEXT: $sgpr35 = S_MOV_B32 5
@@ -131,7 +131,7 @@ body:             |
     liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9, $vgpr10
 
     ; GCN-LABEL: name: preserve_inactive_lanes_wwm_args
-    ; GCN: liveins: $sgpr0, $sgpr35, $vgpr0, $vgpr8, $vgpr10
+    ; GCN: liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr10
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: renamable $vgpr8 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr8
     ; GCN-NEXT: $sgpr35 = S_MOV_B32 5
@@ -200,7 +200,7 @@ body:             |
     liveins: $sgpr0, $sgpr35, $vgpr0, $vgpr8
 
     ; GCN-LABEL: name: dont_preserve_v0_v7
-    ; GCN: liveins: $sgpr0, $sgpr35, $vgpr0, $vgpr1, $vgpr8
+    ; GCN: liveins: $sgpr0, $sgpr35, $vgpr0, $vgpr8
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr0
     ; GCN-NEXT: $sgpr35 = S_MOV_B32 5
diff --git a/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain.mir b/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain.mir
index e0a3f9f597c4b..4e8d71a37efbd 100644
--- a/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain.mir
+++ b/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain.mir
@@ -35,11 +35,11 @@ body:             |
     liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9
 
     ; GCN-LABEL: name: preserve_inactive_wwm
-    ; GCN: liveins: $sgpr0, $sgpr35, $vgpr0
+    ; GCN: liveins: $sgpr0, $sgpr35
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
     ; GCN-NEXT: renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
-    ; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr0
+    ; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr1
     renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
     renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
     SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8
@@ -162,7 +162,7 @@ body:             |
     liveins: $sgpr0, $sgpr35, $vgpr0, $vgpr7, $vgpr8, $vgpr9
 
     ; GCN-LABEL: name: dont_preserve_v0_v7
-    ; GCN: liveins: $sgpr0, $sgpr35, $vgpr0, $vgpr1, $vgpr7, $vgpr8, $vgpr9
+    ; GCN: liveins: $sgpr0, $sgpr35, $vgpr0, $vgpr7, $vgpr8, $vgpr9
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr0
     ; GCN-NEXT: $sgpr35 = S_MOV_B32 5
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-dead-frame-in-dbg-value.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-dead-frame-in-dbg-value.mir
index 8308e6ba4355f..6d1dd18e9279c 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-dead-frame-in-dbg-value.mir
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-dead-frame-in-dbg-value.mir
@@ -57,15 +57,12 @@ body:             |
   ; PEI-LABEL: name: test
   ; PEI: bb.0:
   ; PEI-NEXT:   successors: %bb.1(0x80000000)
-  ; PEI-NEXT:   liveins: $vgpr0
   ; PEI-NEXT: {{  $}}
   ; PEI-NEXT:   renamable $sgpr10 = IMPLICIT_DEF
   ; PEI-NEXT:   $vgpr0 = IMPLICIT_DEF
   ; PEI-NEXT:   $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr10, 0, killed $vgpr0
   ; PEI-NEXT: {{  $}}
   ; PEI-NEXT: bb.1:
-  ; PEI-NEXT:   liveins: $vgpr0
-  ; PEI-NEXT: {{  $}}
   ; PEI-NEXT:   $sgpr10 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 0
   ; PEI-NEXT:   S_ENDPGM 0
   bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir
index 4b787b846fd39..73325045f2053 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir
@@ -28,16 +28,16 @@ body:             |
   ; GCN-LABEL: name: test_main
   ; GCN: bb.0:
   ; GCN-NEXT:   successors: %bb.1(0x80000000)
-  ; GCN-NEXT:   liveins: $vcc_hi, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $sgpr102, $sgpr103, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+  ; GCN-NEXT:   liveins: $vcc_hi, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $sgpr102, $sgpr103, $vgpr0
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   $vcc_hi = frame-setup COPY $sgpr33
   ; GCN-NEXT:   $sgpr33 = frame-setup COPY $sgpr32
   ; GCN-NEXT:   $sgpr0 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-  ; GCN-NEXT:   SCRATCH_STORE_DWORD_SADDR $vgpr1, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.69, addrspace 5)
-  ; GCN-NEXT:   SCRATCH_STORE_DWORD_SADDR $vgpr2, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.70, addrspace 5)
-  ; GCN-NEXT:   SCRATCH_STORE_DWORD_SADDR $vgpr3, $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.71, addrspace 5)
-  ; GCN-NEXT:   SCRATCH_STORE_DWORD_SADDR $vgpr4, $sgpr33, 12, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.72, addrspace 5)
-  ; GCN-NEXT:   SCRATCH_STORE_DWORD_SADDR $vgpr5, $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.73, addrspace 5)
+  ; GCN-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.69, addrspace 5)
+  ; GCN-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.70, addrspace 5)
+  ; GCN-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.71, addrspace 5)
+  ; GCN-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr4, $sgpr33, 12, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.72, addrspace 5)
+  ; GCN-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr5, $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.73, addrspace 5)
   ; GCN-NEXT:   $exec_lo = S_MOV_B32 killed $sgpr0
   ; GCN-NEXT:   $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 24, implicit-def dead $scc
   ; GCN-NEXT:   $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, $vgpr2
@@ -115,18 +115,18 @@ body:             |
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT: bb.1:
   ; GCN-NEXT:   successors: %bb.2(0x80000000)
-  ; GCN-NEXT:   liveins: $vcc_hi, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+  ; GCN-NEXT:   liveins: $vcc_hi
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   KILL implicit-def $vcc_lo, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99_sgpr100_sgpr101_sgpr102_sgpr103
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT: bb.2:
   ; GCN-NEXT:   successors: %bb.3(0x80000000)
-  ; GCN-NEXT:   liveins: $vcc_hi, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+  ; GCN-NEXT:   liveins: $vcc_hi
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   $sgpr22 = SI_RESTORE_S32_FROM_VGPR killed $vgpr5, 0
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT: bb.3:
-  ; GCN-NEXT:   liveins: $vcc_hi, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+  ; GCN-NEXT:   liveins: $vcc_hi
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   $sgpr103 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 3
   ; GCN-NEXT:   $sgpr102 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 2
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-to-vmem-scc-clobber.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-to-vmem-scc-clobber.mir
index 2c4b7a22facf4..59c4b715dd12e 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-to-vmem-scc-clobber.mir
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-to-vmem-scc-clobber.mir
@@ -218,7 +218,7 @@ body:             |
   ; VMEM-GFX8-LABEL: name: sgpr32_save_clobber_scc_emergency_stack_slot
   ; VMEM-GFX8: bb.0:
   ; VMEM-GFX8-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
-  ; VMEM-GFX8-NEXT:   liveins: $sgpr8, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; VMEM-GFX8-NEXT:   $sgpr4_sgpr5 = S_MOV_B64 $exec
@@ -235,7 +235,7 @@ body:             |
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
   ; VMEM-GFX8-NEXT:   successors: %bb.2(0x80000000)
-  ; VMEM-GFX8-NEXT:   liveins: $sgpr8, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_NOP 0
   ; VMEM-GFX8-NEXT: {{  $}}
@@ -271,7 +271,7 @@ body:             |
   ; VMEM-GFX8-LABEL: name: sgpr32_restore_clobber_scc_emergency_stack_slot
   ; VMEM-GFX8: bb.0:
   ; VMEM-GFX8-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
-  ; VMEM-GFX8-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; VMEM-GFX8-NEXT:   $sgpr4_sgpr5 = S_MOV_B64 $exec
@@ -288,7 +288,7 @@ body:             |
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
   ; VMEM-GFX8-NEXT:   successors: %bb.2(0x80000000)
-  ; VMEM-GFX8-NEXT:   liveins: $sgpr8, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_NOP 0
   ; VMEM-GFX8-NEXT: {{  $}}
@@ -327,7 +327,7 @@ body:             |
   ; VMEM-GFX8-LABEL: name: sgpr64_save_clobber_scc_emergency_stack_slot
   ; VMEM-GFX8: bb.0:
   ; VMEM-GFX8-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
-  ; VMEM-GFX8-NEXT:   liveins: $sgpr8_sgpr9, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8_sgpr9
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; VMEM-GFX8-NEXT:   $sgpr4_sgpr5 = S_MOV_B64 $exec
@@ -345,7 +345,7 @@ body:             |
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
   ; VMEM-GFX8-NEXT:   successors: %bb.2(0x80000000)
-  ; VMEM-GFX8-NEXT:   liveins: $sgpr8_sgpr9, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8_sgpr9
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_NOP 0
   ; VMEM-GFX8-NEXT: {{  $}}
@@ -381,7 +381,7 @@ body:             |
   ; VMEM-GFX8-LABEL: name: sgpr64_restore_clobber_scc_emergency_stack_slot
   ; VMEM-GFX8: bb.0:
   ; VMEM-GFX8-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
-  ; VMEM-GFX8-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; VMEM-GFX8-NEXT:   $sgpr4_sgpr5 = S_MOV_B64 $exec
@@ -399,7 +399,7 @@ body:             |
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
   ; VMEM-GFX8-NEXT:   successors: %bb.2(0x80000000)
-  ; VMEM-GFX8-NEXT:   liveins: $sgpr8_sgpr9, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8_sgpr9
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_NOP 0
   ; VMEM-GFX8-NEXT: {{  $}}
@@ -441,7 +441,7 @@ body:             |
   ; VMEM-GFX8-LABEL: name: sgpr32_save_clobber_scc_emergency_stack_slot_x2
   ; VMEM-GFX8: bb.0:
   ; VMEM-GFX8-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
-  ; VMEM-GFX8-NEXT:   liveins: $sgpr8, $sgpr9, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8, $sgpr9
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; VMEM-GFX8-NEXT:   $sgpr4_sgpr5 = S_MOV_B64 $exec
@@ -468,7 +468,7 @@ body:             |
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
   ; VMEM-GFX8-NEXT:   successors: %bb.2(0x80000000)
-  ; VMEM-GFX8-NEXT:   liveins: $sgpr8, $sgpr9, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8, $sgpr9
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_NOP 0
   ; VMEM-GFX8-NEXT: {{  $}}
@@ -505,7 +505,7 @@ body:             |
   ; VMEM-GFX8-LABEL: name: sgpr32_restore_clobber_scc_emergency_stack_slot_x2
   ; VMEM-GFX8: bb.0:
   ; VMEM-GFX8-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
-  ; VMEM-GFX8-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; VMEM-GFX8-NEXT:   $sgpr4_sgpr5 = S_MOV_B64 $exec
@@ -532,7 +532,7 @@ body:             |
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
   ; VMEM-GFX8-NEXT:   successors: %bb.2(0x80000000)
-  ; VMEM-GFX8-NEXT:   liveins: $sgpr8, $sgpr9, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8, $sgpr9
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_NOP 0
   ; VMEM-GFX8-NEXT: {{  $}}
diff --git a/llvm/test/CodeGen/AMDGPU/spill-sgpr-used-for-exec-copy.mir b/llvm/test/CodeGen/AMDGPU/spill-sgpr-used-for-exec-copy.mir
index fa82a4efd89f4..9b0f52cb39b01 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-sgpr-used-for-exec-copy.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill-sgpr-used-for-exec-copy.mir
@@ -19,7 +19,7 @@ body:             |
   bb.0:
     liveins: $sgpr30_sgpr31, $vgpr0
     ; GCN-LABEL: name: shift_back_exec_copy_reserved_reg
-    ; GCN: liveins: $vgpr0, $sgpr30_sgpr31
+    ; GCN: liveins: $sgpr30_sgpr31, $vgpr0
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr30, 0, killed $vgpr0
     ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr31, 1, killed $vgpr0
diff --git a/llvm/test/CodeGen/AMDGPU/tied-op-for-wwm-scratch-reg-spill-restore.mir b/llvm/test/CodeGen/AMDGPU/tied-op-for-wwm-scratch-reg-spill-restore.mir
index ce0a4bbbe5576..cc261b0da4a8f 100644
--- a/llvm/test/CodeGen/AMDGPU/tied-op-for-wwm-scratch-reg-spill-restore.mir
+++ b/llvm/test/CodeGen/AMDGPU/tied-op-for-wwm-scratch-reg-spill-restore.mir
@@ -17,10 +17,10 @@ body:             |
   bb.0:
     liveins: $sgpr20, $vgpr1
     ; GCN-LABEL: name: wwm_scratch_reg_spill_reload_of_outgoing_reg
-    ; GCN: liveins: $sgpr20, $vgpr0, $vgpr1
+    ; GCN: liveins: $sgpr20, $vgpr1
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
     ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
     ; GCN-NEXT: $vgpr0 = IMPLICIT_DEF
     ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr0
@@ -50,11 +50,11 @@ body:             |
   bb.0:
     liveins: $sgpr20, $sgpr21, $vgpr1
     ; GCN-LABEL: name: wwm_scratch_reg_spill_reload_of_outgoing_tuple_subreg
-    ; GCN: liveins: $sgpr20, $sgpr21, $vgpr0, $vgpr1, $vgpr2
+    ; GCN: liveins: $sgpr20, $sgpr21, $vgpr1
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
     ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
     ; GCN-NEXT: $vgpr0 = IMPLICIT_DEF
     ; GCN-NEXT: $vgpr2 = IMPLICIT_DEF
@@ -89,10 +89,10 @@ body:             |
   bb.0:
     liveins: $sgpr20, $vgpr1
     ; GCN-LABEL: name: wwm_scratch_reg_spill_reload_different_outgoing_reg
-    ; GCN: liveins: $sgpr20, $vgpr1, $vgpr2
+    ; GCN: liveins: $sgpr20, $vgpr1
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
     ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
     ; GCN-NEXT: $vgpr2 = IMPLICIT_DEF
     ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr2
@@ -121,10 +121,10 @@ body:             |
   bb.0:
     liveins: $sgpr20, $vgpr1
     ; GCN-LABEL: name: wwm_csr_spill_reload
-    ; GCN: liveins: $sgpr20, $vgpr1, $vgpr2
+    ; GCN: liveins: $sgpr20, $vgpr1
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
     ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
     ; GCN-NEXT: $vgpr2 = IMPLICIT_DEF
     ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr2
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir b/llvm/test/CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir
index dd3572c027c86..e5caa509835c3 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir
@@ -413,7 +413,7 @@ body:             |
   ; MUBUF-LABEL: name: vgpr32_restore_clobber_scc_emergency_stack_slot
   ; MUBUF: bb.0:
   ; MUBUF-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
-  ; MUBUF-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; MUBUF-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
@@ -424,7 +424,7 @@ body:             |
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT: bb.1:
   ; MUBUF-NEXT:   successors: %bb.2(0x80000000)
-  ; MUBUF-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; MUBUF-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT:   S_NOP 0
   ; MUBUF-NEXT: {{  $}}
@@ -434,7 +434,7 @@ body:             |
   ; GFX9-FLATSCR-LABEL: name: vgpr32_restore_clobber_scc_emergency_stack_slot
   ; GFX9-FLATSCR: bb.0:
   ; GFX9-FLATSCR-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
-  ; GFX9-FLATSCR-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; GFX9-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; GFX9-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
@@ -446,7 +446,7 @@ body:             |
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT: bb.1:
   ; GFX9-FLATSCR-NEXT:   successors: %bb.2(0x80000000)
-  ; GFX9-FLATSCR-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; GFX9-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT:   S_NOP 0
   ; GFX9-FLATSCR-NEXT: {{  $}}
@@ -456,7 +456,7 @@ body:             |
   ; GFX10-FLATSCR-LABEL: name: vgpr32_restore_clobber_scc_emergency_stack_slot
   ; GFX10-FLATSCR: bb.0:
   ; GFX10-FLATSCR-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
-  ; GFX10-FLATSCR-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; GFX10-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; GFX10-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
@@ -467,7 +467,7 @@ body:             |
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT: bb.1:
   ; GFX10-FLATSCR-NEXT:   successors: %bb.2(0x80000000)
-  ; GFX10-FLATSCR-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; GFX10-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT:   S_NOP 0
   ; GFX10-FLATSCR-NEXT: {{  $}}
@@ -477,7 +477,7 @@ body:             |
   ; VMEM-GFX8-LABEL: name: vgpr32_restore_clobber_scc_emergency_stack_slot
   ; VMEM-GFX8: bb.0:
   ; VMEM-GFX8-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
-  ; VMEM-GFX8-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
@@ -488,7 +488,7 @@ body:             |
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
   ; VMEM-GFX8-NEXT:   successors: %bb.2(0x80000000)
-  ; VMEM-GFX8-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_NOP 0
   ; VMEM-GFX8-NEXT: {{  $}}
@@ -525,7 +525,7 @@ body:             |
   ; MUBUF-LABEL: name: vgpr64_restore_clobber_scc_emergency_stack_slot
   ; MUBUF: bb.0:
   ; MUBUF-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
-  ; MUBUF-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; MUBUF-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
@@ -537,7 +537,7 @@ body:             |
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT: bb.1:
   ; MUBUF-NEXT:   successors: %bb.2(0x80000000)
-  ; MUBUF-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; MUBUF-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT:   S_NOP 0
   ; MUBUF-NEXT: {{  $}}
@@ -547,7 +547,7 @@ body:             |
   ; GFX9-FLATSCR-LABEL: name: vgpr64_restore_clobber_scc_emergency_stack_slot
   ; GFX9-FLATSCR: bb.0:
   ; GFX9-FLATSCR-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
-  ; GFX9-FLATSCR-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; GFX9-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; GFX9-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
@@ -559,7 +559,7 @@ body:             |
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT: bb.1:
   ; GFX9-FLATSCR-NEXT:   successors: %bb.2(0x80000000)
-  ; GFX9-FLATSCR-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; GFX9-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT:   S_NOP 0
   ; GFX9-FLATSCR-NEXT: {{  $}}
@@ -569,7 +569,7 @@ body:             |
   ; GFX10-FLATSCR-LABEL: name: vgpr64_restore_clobber_scc_emergency_stack_slot
   ; GFX10-FLATSCR: bb.0:
   ; GFX10-FLATSCR-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
-  ; GFX10-FLATSCR-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; GFX10-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; GFX10-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
@@ -580,7 +580,7 @@ body:             |
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT: bb.1:
   ; GFX10-FLATSCR-NEXT:   successors: %bb.2(0x80000000)
-  ; GFX10-FLATSCR-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; GFX10-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT:   S_NOP 0
   ; GFX10-FLATSCR-NEXT: {{  $}}
@@ -590,7 +590,7 @@ body:             |
   ; VMEM-GFX8-LABEL: name: vgpr64_restore_clobber_scc_emergency_stack_slot
   ; VMEM-GFX8: bb.0:
   ; VMEM-GFX8-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
-  ; VMEM-GFX8-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
@@ -602,7 +602,7 @@ body:             |
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
   ; VMEM-GFX8-NEXT:   successors: %bb.2(0x80000000)
-  ; VMEM-GFX8-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_NOP 0
   ; VMEM-GFX8-NEXT: {{  $}}
@@ -639,7 +639,7 @@ body:             |
   ; MUBUF-LABEL: name: vgpr96_restore_clobber_scc_emergency_stack_slot
   ; MUBUF: bb.0:
   ; MUBUF-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
-  ; MUBUF-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; MUBUF-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
@@ -652,7 +652,7 @@ body:             |
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT: bb.1:
   ; MUBUF-NEXT:   successors: %bb.2(0x80000000)
-  ; MUBUF-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; MUBUF-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT:   S_NOP 0
   ; MUBUF-NEXT: {{  $}}
@@ -662,7 +662,7 @@ body:             |
   ; GFX9-FLATSCR-LABEL: name: vgpr96_restore_clobber_scc_emergency_stack_slot
   ; GFX9-FLATSCR: bb.0:
   ; GFX9-FLATSCR-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
-  ; GFX9-FLATSCR-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; GFX9-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; GFX9-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
@@ -674,7 +674,7 @@ body:             |
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT: bb.1:
   ; GFX9-FLATSCR-NEXT:   successors: %bb.2(0x80000000)
-  ; GFX9-FLATSCR-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; GFX9-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT:   S_NOP 0
   ; GFX9-FLATSCR-NEXT: {{  $}}
@@ -684,7 +684,7 @@ body:             |
   ; GFX10-FLATSCR-LABEL: name: vgpr96_restore_clobber_scc_emergency_stack_slot
   ; GFX10-FLATSCR: bb.0:
   ; GFX10-FLATSCR-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
-  ; GFX10-FLATSCR-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; GFX10-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; GFX10-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
@@ -695,7 +695,7 @@ body:             |
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT: bb.1:
   ; GFX10-FLATSCR-NEXT:   successors: %bb.2(0x80000000)
-  ; GFX10-FLATSCR-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; GFX10-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT:   S_NOP 0
   ; GFX10-FLATSCR-NEXT: {{  $}}
@@ -705,7 +705,7 @@ body:             |
   ; VMEM-GFX8-LABEL: name: vgpr96_restore_clobber_scc_emergency_stack_slot
   ; VMEM-GFX8: bb.0:
   ; VMEM-GFX8-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
-  ; VMEM-GFX8-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
@@ -718,7 +718,7 @@ body:             |
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
   ; VMEM-GFX8-NEXT:   successors: %bb.2(0x80000000)
-  ; VMEM-GFX8-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_NOP 0
   ; VMEM-GFX8-NEXT: {{  $}}
@@ -755,7 +755,7 @@ body:             |
   ; MUBUF-LABEL: name: vgpr32_save_clobber_scc_emergency_stack_slot
   ; MUBUF: bb.0:
   ; MUBUF-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
-  ; MUBUF-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; MUBUF-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
@@ -766,7 +766,7 @@ body:             |
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT: bb.1:
   ; MUBUF-NEXT:   successors: %bb.2(0x80000000)
-  ; MUBUF-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; MUBUF-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT:   S_NOP 0
   ; MUBUF-NEXT: {{  $}}
@@ -776,7 +776,7 @@ body:             |
   ; GFX9-FLATSCR-LABEL: name: vgpr32_save_clobber_scc_emergency_stack_slot
   ; GFX9-FLATSCR: bb.0:
   ; GFX9-FLATSCR-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
-  ; GFX9-FLATSCR-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; GFX9-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; GFX9-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
@@ -788,7 +788,7 @@ body:             |
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT: bb.1:
   ; GFX9-FLATSCR-NEXT:   successors: %bb.2(0x80000000)
-  ; GFX9-FLATSCR-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; GFX9-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT:   S_NOP 0
   ; GFX9-FLATSCR-NEXT: {{  $}}
@@ -798,7 +798,7 @@ body:             |
   ; GFX10-FLATSCR-LABEL: name: vgpr32_save_clobber_scc_emergency_stack_slot
   ; GFX10-FLATSCR: bb.0:
   ; GFX10-FLATSCR-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
-  ; GFX10-FLATSCR-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; GFX10-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; GFX10-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
@@ -809,7 +809,7 @@ body:             |
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT: bb.1:
   ; GFX10-FLATSCR-NEXT:   successors: %bb.2(0x80000000)
-  ; GFX10-FLATSCR-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; GFX10-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT:   S_NOP 0
   ; GFX10-FLATSCR-NEXT: {{  $}}
@@ -819,7 +819,7 @@ body:             |
   ; VMEM-GFX8-LABEL: name: vgpr32_save_clobber_scc_emergency_stack_slot
   ; VMEM-GFX8: bb.0:
   ; VMEM-GFX8-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
-  ; VMEM-GFX8-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
@@ -830,7 +830,7 @@ body:             |
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
   ; VMEM-GFX8-NEXT:   successors: %bb.2(0x80000000)
-  ; VMEM-GFX8-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_NOP 0
   ; VMEM-GFX8-NEXT: {{  $}}
@@ -867,7 +867,7 @@ body:             |
   ; MUBUF-LABEL: name: vgpr64_save_clobber_scc_emergency_stack_slot
   ; MUBUF: bb.0:
   ; MUBUF-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
-  ; MUBUF-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; MUBUF-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
@@ -879,7 +879,7 @@ body:             |
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT: bb.1:
   ; MUBUF-NEXT:   successors: %bb.2(0x80000000)
-  ; MUBUF-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; MUBUF-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT:   S_NOP 0
   ; MUBUF-NEXT: {{  $}}
@@ -889,7 +889,7 @@ body:             |
   ; GFX9-FLATSCR-LABEL: name: vgpr64_save_clobber_scc_emergency_stack_slot
   ; GFX9-FLATSCR: bb.0:
   ; GFX9-FLATSCR-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
-  ; GFX9-FLATSCR-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; GFX9-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; GFX9-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
@@ -901,7 +901,7 @@ body:             |
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT: bb.1:
   ; GFX9-FLATSCR-NEXT:   successors: %bb.2(0x80000000)
-  ; GFX9-FLATSCR-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; GFX9-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT:   S_NOP 0
   ; GFX9-FLATSCR-NEXT: {{  $}}
@@ -911,7 +911,7 @@ body:             |
   ; GFX10-FLATSCR-LABEL: name: vgpr64_save_clobber_scc_emergency_stack_slot
   ; GFX10-FLATSCR: bb.0:
   ; GFX10-FLATSCR-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
-  ; GFX10-FLATSCR-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; GFX10-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; GFX10-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
@@ -922,7 +922,7 @@ body:             |
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT: bb.1:
   ; GFX10-FLATSCR-NEXT:   successors: %bb.2(0x80000000)
-  ; GFX10-FLATSCR-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; GFX10-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT:   S_NOP 0
   ; GFX10-FLATSCR-NEXT: {{  $}}
@@ -932,7 +932,7 @@ body:             |
   ; VMEM-GFX8-LABEL: name: vgpr64_save_clobber_scc_emergency_stack_slot
   ; VMEM-GFX8: bb.0:
   ; VMEM-GFX8-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
-  ; VMEM-GFX8-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
@@ -944,7 +944,7 @@ body:             |
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
   ; VMEM-GFX8-NEXT:   successors: %bb.2(0x80000000)
-  ; VMEM-GFX8-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_NOP 0
   ; VMEM-GFX8-NEXT: {{  $}}
@@ -981,7 +981,7 @@ body:             |
   ; MUBUF-LABEL: name: vgpr96_save_clobber_scc_emergency_stack_slot
   ; MUBUF: bb.0:
   ; MUBUF-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
-  ; MUBUF-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; MUBUF-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
@@ -994,7 +994,7 @@ body:             |
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT: bb.1:
   ; MUBUF-NEXT:   successors: %bb.2(0x80000000)
-  ; MUBUF-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; MUBUF-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT:   S_NOP 0
   ; MUBUF-NEXT: {{  $}}
@@ -1004,7 +1004,7 @@ body:             |
   ; GFX9-FLATSCR-LABEL: name: vgpr96_save_clobber_scc_emergency_stack_slot
   ; GFX9-FLATSCR: bb.0:
   ; GFX9-FLATSCR-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
-  ; GFX9-FLATSCR-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; GFX9-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; GFX9-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
@@ -1016,7 +1016,7 @@ body:             |
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT: bb.1:
   ; GFX9-FLATSCR-NEXT:   successors: %bb.2(0x80000000)
-  ; GFX9-FLATSCR-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; GFX9-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT:   S_NOP 0
   ; GFX9-FLATSCR-NEXT: {{  $}}
@@ -1026,7 +1026,7 @@ body:             |
   ; GFX10-FLATSCR-LABEL: name: vgpr96_save_clobber_scc_emergency_stack_slot
   ; GFX10-FLATSCR: bb.0:
   ; GFX10-FLATSCR-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
-  ; GFX10-FLATSCR-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; GFX10-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; GFX10-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
@@ -1037,7 +1037,7 @@ body:             |
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT: bb.1:
   ; GFX10-FLATSCR-NEXT:   successors: %bb.2(0x80000000)
-  ; GFX10-FLATSCR-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; GFX10-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT:   S_NOP 0
   ; GFX10-FLATSCR-NEXT: {{  $}}
@@ -1047,7 +1047,7 @@ body:             |
   ; VMEM-GFX8-LABEL: name: vgpr96_save_clobber_scc_emergency_stack_slot
   ; VMEM-GFX8: bb.0:
   ; VMEM-GFX8-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
-  ; VMEM-GFX8-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
@@ -1060,7 +1060,7 @@ body:             |
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
   ; VMEM-GFX8-NEXT:   successors: %bb.2(0x80000000)
-  ; VMEM-GFX8-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_NOP 0
   ; VMEM-GFX8-NEXT: {{  $}}
@@ -1200,7 +1200,7 @@ body:             |
   ; MUBUF-LABEL: name: mubuf_load_restore_clobber_scc_no_vgprs_emergency_stack_slot
   ; MUBUF: bb.0:
   ; MUBUF-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
-  ; MUBUF-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; MUBUF-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
@@ -1211,7 +1211,7 @@ body:             |
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT: bb.1:
   ; MUBUF-NEXT:   successors: %bb.2(0x80000000)
-  ; MUBUF-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; MUBUF-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT:   S_NOP 0
   ; MUBUF-NEXT: {{  $}}
@@ -1221,7 +1221,7 @@ body:             |
   ; GFX9-FLATSCR-LABEL: name: mubuf_load_restore_clobber_scc_no_vgprs_emergency_stack_slot
   ; GFX9-FLATSCR: bb.0:
   ; GFX9-FLATSCR-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
-  ; GFX9-FLATSCR-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; GFX9-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; GFX9-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
@@ -1235,7 +1235,7 @@ body:             |
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT: bb.1:
   ; GFX9-FLATSCR-NEXT:   successors: %bb.2(0x80000000)
-  ; GFX9-FLATSCR-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; GFX9-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT:   S_NOP 0
   ; GFX9-FLATSCR-NEXT: {{  $}}
@@ -1245,7 +1245,7 @@ body:             |
   ; GFX10-FLATSCR-LABEL: name: mubuf_load_restore_clobber_scc_no_vgprs_emergency_stack_slot
   ; GFX10-FLATSCR: bb.0:
   ; GFX10-FLATSCR-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
-  ; GFX10-FLATSCR-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; GFX10-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; GFX10-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
@@ -1259,7 +1259,7 @@ body:             |
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT: bb.1:
   ; GFX10-FLATSCR-NEXT:   successors: %bb.2(0x80000000)
-  ; GFX10-FLATSCR-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; GFX10-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT:   S_NOP 0
   ; GFX10-FLATSCR-NEXT: {{  $}}
@@ -1269,7 +1269,7 @@ body:             |
   ; VMEM-GFX8-LABEL: name: mubuf_load_restore_clobber_scc_no_vgprs_emergency_stack_slot
   ; VMEM-GFX8: bb.0:
   ; VMEM-GFX8-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
-  ; VMEM-GFX8-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
@@ -1280,7 +1280,7 @@ body:             |
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
   ; VMEM-GFX8-NEXT:   successors: %bb.2(0x80000000)
-  ; VMEM-GFX8-NEXT:   liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+  ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_NOP 0
   ; VMEM-GFX8-NEXT: {{  $}}

>From 69b2a02913348f054b38c08f3a8af1d6087cb054 Mon Sep 17 00:00:00 2001
From: Christudasan Devadasan <Christudasan.Devadasan at amd.com>
Date: Tue, 28 May 2024 19:41:58 +0530
Subject: [PATCH 13/13] Fix header inclusion and renamed two functions.

---
 llvm/include/llvm/CodeGen/Passes.h         |  6 +++---
 llvm/include/llvm/CodeGen/RegAllocCommon.h |  4 ++--
 llvm/lib/CodeGen/RegAllocBase.cpp          |  2 +-
 llvm/lib/CodeGen/RegAllocBase.h            |  6 +++---
 llvm/lib/CodeGen/RegAllocBasic.cpp         | 10 ++++------
 llvm/lib/CodeGen/RegAllocFast.cpp          | 10 +++++-----
 llvm/lib/CodeGen/RegAllocGreedy.cpp        | 12 +++++-------
 llvm/lib/CodeGen/RegAllocGreedy.h          |  2 +-
 8 files changed, 24 insertions(+), 28 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h
index f850767270a4f..cafb9781698a2 100644
--- a/llvm/include/llvm/CodeGen/Passes.h
+++ b/llvm/include/llvm/CodeGen/Passes.h
@@ -205,20 +205,20 @@ namespace llvm {
   /// possible. It is best suited for debug code where live ranges are short.
   ///
   FunctionPass *createFastRegisterAllocator();
-  FunctionPass *createFastRegisterAllocator(RegClassFilterFunc F,
+  FunctionPass *createFastRegisterAllocator(RegAllocFilterFunc F,
                                             bool ClearVirtRegs);
 
   /// BasicRegisterAllocation Pass - This pass implements a degenerate global
   /// register allocator using the basic regalloc framework.
   ///
   FunctionPass *createBasicRegisterAllocator();
-  FunctionPass *createBasicRegisterAllocator(RegClassFilterFunc F);
+  FunctionPass *createBasicRegisterAllocator(RegAllocFilterFunc F);
 
   /// Greedy register allocation pass - This pass implements a global register
   /// allocator for optimized builds.
   ///
   FunctionPass *createGreedyRegisterAllocator();
-  FunctionPass *createGreedyRegisterAllocator(RegClassFilterFunc F);
+  FunctionPass *createGreedyRegisterAllocator(RegAllocFilterFunc F);
 
   /// PBQPRegisterAllocation Pass - This pass implements the Partitioned Boolean
   /// Quadratic Prograaming (PBQP) based register allocator.
diff --git a/llvm/include/llvm/CodeGen/RegAllocCommon.h b/llvm/include/llvm/CodeGen/RegAllocCommon.h
index f3423083eef3a..e6fb468dbad9d 100644
--- a/llvm/include/llvm/CodeGen/RegAllocCommon.h
+++ b/llvm/include/llvm/CodeGen/RegAllocCommon.h
@@ -9,8 +9,8 @@
 #ifndef LLVM_CODEGEN_REGALLOCCOMMON_H
 #define LLVM_CODEGEN_REGALLOCCOMMON_H
 
+#include "llvm/CodeGen/Register.h"
 #include <functional>
-#include <llvm/CodeGen/Register.h>
 
 namespace llvm {
 
@@ -20,7 +20,7 @@ class MachineRegisterInfo;
 
 typedef std::function<bool(const TargetRegisterInfo &TRI,
                            const MachineRegisterInfo &MRI, const Register Reg)>
-    RegClassFilterFunc;
+    RegAllocFilterFunc;
 
 /// Default register class filter function for register allocation. All virtual
 /// registers should be allocated.
diff --git a/llvm/lib/CodeGen/RegAllocBase.cpp b/llvm/lib/CodeGen/RegAllocBase.cpp
index a4645ed93029d..0e6b5cfb938f2 100644
--- a/llvm/lib/CodeGen/RegAllocBase.cpp
+++ b/llvm/lib/CodeGen/RegAllocBase.cpp
@@ -181,7 +181,7 @@ void RegAllocBase::enqueue(const LiveInterval *LI) {
   if (VRM->hasPhys(Reg))
     return;
 
-  if (ShouldAllocateClass(*TRI, *MRI, Reg)) {
+  if (ShouldAllocateRegister(*TRI, *MRI, Reg)) {
     LLVM_DEBUG(dbgs() << "Enqueuing " << printReg(Reg, TRI) << '\n');
     enqueueImpl(LI);
   } else {
diff --git a/llvm/lib/CodeGen/RegAllocBase.h b/llvm/lib/CodeGen/RegAllocBase.h
index a8bf305a50c98..e08e7cfddeb7a 100644
--- a/llvm/lib/CodeGen/RegAllocBase.h
+++ b/llvm/lib/CodeGen/RegAllocBase.h
@@ -68,7 +68,7 @@ class RegAllocBase {
   LiveIntervals *LIS = nullptr;
   LiveRegMatrix *Matrix = nullptr;
   RegisterClassInfo RegClassInfo;
-  const RegClassFilterFunc ShouldAllocateClass;
+  const RegAllocFilterFunc ShouldAllocateRegister;
 
   /// Inst which is a def of an original reg and whose defs are already all
   /// dead after remat is saved in DeadRemats. The deletion of such inst is
@@ -76,8 +76,8 @@ class RegAllocBase {
   /// always available for the remat of all the siblings of the original reg.
   SmallPtrSet<MachineInstr *, 32> DeadRemats;
 
-  RegAllocBase(const RegClassFilterFunc F = allocateAllRegClasses) :
-    ShouldAllocateClass(F) {}
+  RegAllocBase(const RegAllocFilterFunc F = allocateAllRegClasses)
+      : ShouldAllocateRegister(F) {}
 
   virtual ~RegAllocBase() = default;
 
diff --git a/llvm/lib/CodeGen/RegAllocBasic.cpp b/llvm/lib/CodeGen/RegAllocBasic.cpp
index 5bd3b126aa166..fb7455ef7a85a 100644
--- a/llvm/lib/CodeGen/RegAllocBasic.cpp
+++ b/llvm/lib/CodeGen/RegAllocBasic.cpp
@@ -74,7 +74,7 @@ class RABasic : public MachineFunctionPass,
   void LRE_WillShrinkVirtReg(Register) override;
 
 public:
-  RABasic(const RegClassFilterFunc F = allocateAllRegClasses);
+  RABasic(const RegAllocFilterFunc F = allocateAllRegClasses);
 
   /// Return the pass name.
   StringRef getPassName() const override { return "Basic Register Allocator"; }
@@ -168,10 +168,8 @@ void RABasic::LRE_WillShrinkVirtReg(Register VirtReg) {
   enqueue(&LI);
 }
 
-RABasic::RABasic(RegClassFilterFunc F):
-  MachineFunctionPass(ID),
-  RegAllocBase(F) {
-}
+RABasic::RABasic(RegAllocFilterFunc F)
+    : MachineFunctionPass(ID), RegAllocBase(F) {}
 
 void RABasic::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesCFG();
@@ -334,6 +332,6 @@ FunctionPass* llvm::createBasicRegisterAllocator() {
   return new RABasic();
 }
 
-FunctionPass* llvm::createBasicRegisterAllocator(RegClassFilterFunc F) {
+FunctionPass *llvm::createBasicRegisterAllocator(RegAllocFilterFunc F) {
   return new RABasic(F);
 }
diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp
index f6419daba6a2d..f48e0d3fc9145 100644
--- a/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -178,9 +178,9 @@ class RegAllocFast : public MachineFunctionPass {
 public:
   static char ID;
 
-  RegAllocFast(const RegClassFilterFunc F = allocateAllRegClasses,
+  RegAllocFast(const RegAllocFilterFunc F = allocateAllRegClasses,
                bool ClearVirtRegs_ = true)
-      : MachineFunctionPass(ID), ShouldAllocateClass(F),
+      : MachineFunctionPass(ID), ShouldAllocateRegister(F),
         StackSlotForVirtReg(-1), ClearVirtRegs(ClearVirtRegs_) {}
 
 private:
@@ -189,7 +189,7 @@ class RegAllocFast : public MachineFunctionPass {
   const TargetRegisterInfo *TRI = nullptr;
   const TargetInstrInfo *TII = nullptr;
   RegisterClassInfo RegClassInfo;
-  const RegClassFilterFunc ShouldAllocateClass;
+  const RegAllocFilterFunc ShouldAllocateRegister;
 
   /// Basic block currently being allocated.
   MachineBasicBlock *MBB = nullptr;
@@ -417,7 +417,7 @@ INITIALIZE_PASS(RegAllocFast, "regallocfast", "Fast Register Allocator", false,
 
 bool RegAllocFast::shouldAllocateRegister(const Register Reg) const {
   assert(Reg.isVirtual());
-  return ShouldAllocateClass(*TRI, *MRI, Reg);
+  return ShouldAllocateRegister(*TRI, *MRI, Reg);
 }
 
 void RegAllocFast::setPhysRegState(MCPhysReg PhysReg, unsigned NewState) {
@@ -1772,7 +1772,7 @@ bool RegAllocFast::runOnMachineFunction(MachineFunction &MF) {
 
 FunctionPass *llvm::createFastRegisterAllocator() { return new RegAllocFast(); }
 
-FunctionPass *llvm::createFastRegisterAllocator(RegClassFilterFunc Ftor,
+FunctionPass *llvm::createFastRegisterAllocator(RegAllocFilterFunc Ftor,
                                                 bool ClearVirtRegs) {
   return new RegAllocFast(Ftor, ClearVirtRegs);
 }
diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp
index b3bf1899ceeaf..935a48ee6596b 100644
--- a/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -192,14 +192,12 @@ FunctionPass* llvm::createGreedyRegisterAllocator() {
   return new RAGreedy();
 }
 
-FunctionPass *llvm::createGreedyRegisterAllocator(RegClassFilterFunc Ftor) {
+FunctionPass *llvm::createGreedyRegisterAllocator(RegAllocFilterFunc Ftor) {
   return new RAGreedy(Ftor);
 }
 
-RAGreedy::RAGreedy(RegClassFilterFunc F):
-  MachineFunctionPass(ID),
-  RegAllocBase(F) {
-}
+RAGreedy::RAGreedy(RegAllocFilterFunc F)
+    : MachineFunctionPass(ID), RegAllocBase(F) {}
 
 void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesCFG();
@@ -2308,7 +2306,7 @@ void RAGreedy::tryHintRecoloring(const LiveInterval &VirtReg) {
 
     // This may be a skipped register.
     if (!VRM->hasPhys(Reg)) {
-      assert(!ShouldAllocateClass(*TRI, *MRI, Reg) &&
+      assert(!ShouldAllocateRegister(*TRI, *MRI, Reg) &&
              "We have an unallocated variable which should have been handled");
       continue;
     }
@@ -2698,7 +2696,7 @@ bool RAGreedy::hasVirtRegAlloc() {
     const TargetRegisterClass *RC = MRI->getRegClass(Reg);
     if (!RC)
       continue;
-    if (ShouldAllocateClass(*TRI, *MRI, Reg))
+    if (ShouldAllocateRegister(*TRI, *MRI, Reg))
       return true;
   }
 
diff --git a/llvm/lib/CodeGen/RegAllocGreedy.h b/llvm/lib/CodeGen/RegAllocGreedy.h
index 1941643bba9e6..83f73308cd2c0 100644
--- a/llvm/lib/CodeGen/RegAllocGreedy.h
+++ b/llvm/lib/CodeGen/RegAllocGreedy.h
@@ -281,7 +281,7 @@ class LLVM_LIBRARY_VISIBILITY RAGreedy : public MachineFunctionPass,
   bool ReverseLocalAssignment = false;
 
 public:
-  RAGreedy(const RegClassFilterFunc F = allocateAllRegClasses);
+  RAGreedy(const RegAllocFilterFunc F = allocateAllRegClasses);
 
   /// Return the pass name.
   StringRef getPassName() const override { return "Greedy Register Allocator"; }



More information about the llvm-commits mailing list