[llvm] 4d2faf0 - [AMDGPU][SIFrameLowering] Mark VGPR used for AGPR spills as reserved

Jeffrey Byrnes via llvm-commits llvm-commits at lists.llvm.org
Fri Dec 16 12:09:52 PST 2022


Author: Jeffrey Byrnes
Date: 2022-12-16T12:00:51-08:00
New Revision: 4d2faf043bbbc19801bafb413b9090721e873be4

URL: https://github.com/llvm/llvm-project/commit/4d2faf043bbbc19801bafb413b9090721e873be4
DIFF: https://github.com/llvm/llvm-project/commit/4d2faf043bbbc19801bafb413b9090721e873be4.diff

LOG: [AMDGPU][SIFrameLowering] Mark VGPR used for AGPR spills as reserved

Presently, there is an issue on MI100 (and probably other architecture) where the VGPR used for AGPR copies clobbers VGPR used for AGPR spill. AFAICT this is because in processFunctionBeforeFrameIndicesReplaced we think the VGPR register for AGPR spill is unused. This patch aims to correct that. This is a WIP while I work out issues with producing a good test. For now, I'm curious if this is generally a good / bad idea.

Differential Revision: https://reviews.llvm.org/D139673

Added: 
    llvm/test/CodeGen/AMDGPU/copy-vgpr-clobber-spill-vgpr.mir

Modified: 
    llvm/include/llvm/CodeGen/MachineRegisterInfo.h
    llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
    llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir
    llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/MachineRegisterInfo.h b/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
index a51f1c753cd02..75a444f796567 100644
--- a/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
+++ b/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
@@ -900,6 +900,18 @@ class MachineRegisterInfo {
   /// of reserved registers before allocation begins.
   void freezeReservedRegs(const MachineFunction&);
 
+  /// reserveReg -- Mark a register as reserved so checks like isAllocatable 
+  /// will not suggest using it. This should not be used during the middle
+  /// of a function walk, or when liveness info is available.
+  void reserveReg(MCRegister PhysReg, const TargetRegisterInfo *TRI) {
+    assert(reservedRegsFrozen() &&
+           "Reserved registers haven't been frozen yet. ");
+    MCRegAliasIterator R(PhysReg, TRI, true);
+
+    for (; R.isValid(); ++R)
+      ReservedRegs.set(*R);
+  }
+
   /// reservedRegsFrozen - Returns true after freezeReservedRegs() was called
   /// to ensure the set of reserved registers stays constant.
   bool reservedRegsFrozen() const {

diff  --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index a7f253b352c11..e20abf4c2ea25 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -425,6 +425,7 @@ bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF,
 
     OtherUsedRegs.set(*NextSpillReg);
     SpillRegs.push_back(*NextSpillReg);
+    MRI.reserveReg(*NextSpillReg, TRI);
     Spill.Lanes[I] = *NextSpillReg++;
   }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir b/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir
index 152e73d1b803b..8e0fa094356d1 100644
--- a/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir
+++ b/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir
@@ -269,10 +269,10 @@ body:             |
   ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr254, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.224, addrspace 5)
   ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr255, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.225, addrspace 5)
   ; GFX90A-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5)
-  ; GFX90A-NEXT:   $vgpr0 = V_MOV_B32_e32 8904, implicit $exec
-  ; GFX90A-NEXT:   $agpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-  ; GFX90A-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5)
+  ; GFX90A-NEXT:   $vgpr40 = V_MOV_B32_e32 8904, implicit $exec
+  ; GFX90A-NEXT:   $agpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+  ; GFX90A-NEXT:   $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5)
   ; GFX90A-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.1:
@@ -762,11 +762,11 @@ body:             |
   ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr254, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.224, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr255, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.225, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $vgpr0 = V_MOV_B32_e32 $sgpr32, implicit $exec
-  ; GFX90A-FLATSCR-NEXT:   $vgpr0 = V_ADD_U32_e32 8904, $vgpr0, implicit $exec
-  ; GFX90A-FLATSCR-NEXT:   $agpr0 = SCRATCH_LOAD_DWORD killed $vgpr0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.226, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $vgpr40 = V_MOV_B32_e32 $sgpr32, implicit $exec
+  ; GFX90A-FLATSCR-NEXT:   $vgpr40 = V_ADD_U32_e32 8904, $vgpr40, implicit $exec
+  ; GFX90A-FLATSCR-NEXT:   $agpr0 = SCRATCH_LOAD_DWORD killed $vgpr40, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.226, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX90A-FLATSCR-NEXT: {{  $}}
   ; GFX90A-FLATSCR-NEXT: bb.1:
@@ -1286,11 +1286,11 @@ body:             |
   ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr254, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.224, addrspace 5)
   ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr255, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.225, addrspace 5)
   ; GFX90A-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5)
-  ; GFX90A-NEXT:   $vgpr0 = V_MOV_B32_e32 8904, implicit $exec
-  ; GFX90A-NEXT:   $agpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1 :: (load (s32) from %stack.1, addrspace 5)
-  ; GFX90A-NEXT:   $agpr1 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit-def $agpr0_agpr1 :: (load (s32) from %stack.1 + 4, addrspace 5)
-  ; GFX90A-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5)
+  ; GFX90A-NEXT:   $vgpr40 = V_MOV_B32_e32 8904, implicit $exec
+  ; GFX90A-NEXT:   $agpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1 :: (load (s32) from %stack.1, addrspace 5)
+  ; GFX90A-NEXT:   $agpr1 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit-def $agpr0_agpr1 :: (load (s32) from %stack.1 + 4, addrspace 5)
+  ; GFX90A-NEXT:   $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5)
   ; GFX90A-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.1:
@@ -1782,11 +1782,11 @@ body:             |
   ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr254, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.224, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr255, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.225, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $vgpr0 = V_MOV_B32_e32 $sgpr32, implicit $exec
-  ; GFX90A-FLATSCR-NEXT:   $vgpr0 = V_ADD_U32_e32 8904, $vgpr0, implicit $exec
-  ; GFX90A-FLATSCR-NEXT:   $agpr0_agpr1 = SCRATCH_LOAD_DWORDX2 killed $vgpr0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s64) from %stack.1, align 4, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.226, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $vgpr40 = V_MOV_B32_e32 $sgpr32, implicit $exec
+  ; GFX90A-FLATSCR-NEXT:   $vgpr40 = V_ADD_U32_e32 8904, $vgpr40, implicit $exec
+  ; GFX90A-FLATSCR-NEXT:   $agpr0_agpr1 = SCRATCH_LOAD_DWORDX2 killed $vgpr40, 0, 0, implicit $exec, implicit $flat_scr :: (load (s64) from %stack.1, align 4, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.226, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX90A-FLATSCR-NEXT: {{  $}}
   ; GFX90A-FLATSCR-NEXT: bb.1:
@@ -2308,12 +2308,12 @@ body:             |
   ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr254, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.224, addrspace 5)
   ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr255, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.225, addrspace 5)
   ; GFX90A-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5)
-  ; GFX90A-NEXT:   $vgpr0 = V_MOV_B32_e32 8904, implicit $exec
-  ; GFX90A-NEXT:   $agpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 :: (load (s32) from %stack.1, addrspace 5)
-  ; GFX90A-NEXT:   $agpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 :: (load (s32) from %stack.1 + 4, addrspace 5)
-  ; GFX90A-NEXT:   $agpr2 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 :: (load (s32) from %stack.1 + 8, addrspace 5)
-  ; GFX90A-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5)
+  ; GFX90A-NEXT:   $vgpr40 = V_MOV_B32_e32 8904, implicit $exec
+  ; GFX90A-NEXT:   $agpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 :: (load (s32) from %stack.1, addrspace 5)
+  ; GFX90A-NEXT:   $agpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 :: (load (s32) from %stack.1 + 4, addrspace 5)
+  ; GFX90A-NEXT:   $agpr2 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 :: (load (s32) from %stack.1 + 8, addrspace 5)
+  ; GFX90A-NEXT:   $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5)
   ; GFX90A-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.1:
@@ -2807,11 +2807,11 @@ body:             |
   ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr254, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.224, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr255, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.225, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $vgpr0 = V_MOV_B32_e32 $sgpr32, implicit $exec
-  ; GFX90A-FLATSCR-NEXT:   $vgpr0 = V_ADD_U32_e32 8904, $vgpr0, implicit $exec
-  ; GFX90A-FLATSCR-NEXT:   $agpr0_agpr1_agpr2 = SCRATCH_LOAD_DWORDX3 killed $vgpr0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s96) from %stack.1, align 4, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.226, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $vgpr40 = V_MOV_B32_e32 $sgpr32, implicit $exec
+  ; GFX90A-FLATSCR-NEXT:   $vgpr40 = V_ADD_U32_e32 8904, $vgpr40, implicit $exec
+  ; GFX90A-FLATSCR-NEXT:   $agpr0_agpr1_agpr2 = SCRATCH_LOAD_DWORDX3 killed $vgpr40, 0, 0, implicit $exec, implicit $flat_scr :: (load (s96) from %stack.1, align 4, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.226, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX90A-FLATSCR-NEXT: {{  $}}
   ; GFX90A-FLATSCR-NEXT: bb.1:
@@ -3329,10 +3329,10 @@ body:             |
   ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr254, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.224, addrspace 5)
   ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr255, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.225, addrspace 5)
   ; GFX90A-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5)
-  ; GFX90A-NEXT:   $vgpr0 = V_MOV_B32_e32 8904, implicit $exec
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFEN $agpr0, killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-  ; GFX90A-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5)
+  ; GFX90A-NEXT:   $vgpr40 = V_MOV_B32_e32 8904, implicit $exec
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFEN $agpr0, killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+  ; GFX90A-NEXT:   $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5)
   ; GFX90A-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.1:
@@ -3822,11 +3822,11 @@ body:             |
   ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr254, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.224, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr255, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.225, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $vgpr0 = V_MOV_B32_e32 $sgpr32, implicit $exec
-  ; GFX90A-FLATSCR-NEXT:   $vgpr0 = V_ADD_U32_e32 8904, $vgpr0, implicit $exec
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD $agpr0, killed $vgpr0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.226, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $vgpr40 = V_MOV_B32_e32 $sgpr32, implicit $exec
+  ; GFX90A-FLATSCR-NEXT:   $vgpr40 = V_ADD_U32_e32 8904, $vgpr40, implicit $exec
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD $agpr0, killed $vgpr40, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.226, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX90A-FLATSCR-NEXT: {{  $}}
   ; GFX90A-FLATSCR-NEXT: bb.1:
@@ -4345,11 +4345,11 @@ body:             |
   ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr254, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.224, addrspace 5)
   ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr255, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.225, addrspace 5)
   ; GFX90A-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5)
-  ; GFX90A-NEXT:   $vgpr0 = V_MOV_B32_e32 8904, implicit $exec
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFEN $agpr0, $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 :: (store (s32) into %stack.1, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFEN $agpr1, killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: (store (s32) into %stack.1 + 4, addrspace 5)
-  ; GFX90A-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5)
+  ; GFX90A-NEXT:   $vgpr40 = V_MOV_B32_e32 8904, implicit $exec
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFEN $agpr0, $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 :: (store (s32) into %stack.1, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFEN $agpr1, killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: (store (s32) into %stack.1 + 4, addrspace 5)
+  ; GFX90A-NEXT:   $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5)
   ; GFX90A-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.1:
@@ -4841,11 +4841,11 @@ body:             |
   ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr254, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.224, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr255, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.225, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $vgpr0 = V_MOV_B32_e32 $sgpr32, implicit $exec
-  ; GFX90A-FLATSCR-NEXT:   $vgpr0 = V_ADD_U32_e32 8904, $vgpr0, implicit $exec
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORDX2 $agpr0_agpr1, killed $vgpr0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) into %stack.1, align 4, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.226, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $vgpr40 = V_MOV_B32_e32 $sgpr32, implicit $exec
+  ; GFX90A-FLATSCR-NEXT:   $vgpr40 = V_ADD_U32_e32 8904, $vgpr40, implicit $exec
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORDX2 $agpr0_agpr1, killed $vgpr40, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) into %stack.1, align 4, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.226, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX90A-FLATSCR-NEXT: {{  $}}
   ; GFX90A-FLATSCR-NEXT: bb.1:
@@ -5365,12 +5365,12 @@ body:             |
   ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr254, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.224, addrspace 5)
   ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr255, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.225, addrspace 5)
   ; GFX90A-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5)
-  ; GFX90A-NEXT:   $vgpr0 = V_MOV_B32_e32 8904, implicit $exec
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFEN $agpr0, $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.1, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFEN $agpr1, $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.1 + 4, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFEN $agpr2, killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.1 + 8, addrspace 5)
-  ; GFX90A-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5)
+  ; GFX90A-NEXT:   $vgpr40 = V_MOV_B32_e32 8904, implicit $exec
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFEN $agpr0, $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.1, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFEN $agpr1, $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.1 + 4, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFEN $agpr2, killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.1 + 8, addrspace 5)
+  ; GFX90A-NEXT:   $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5)
   ; GFX90A-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.1:
@@ -5864,11 +5864,11 @@ body:             |
   ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr254, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.224, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr255, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.225, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $vgpr0 = V_MOV_B32_e32 $sgpr32, implicit $exec
-  ; GFX90A-FLATSCR-NEXT:   $vgpr0 = V_ADD_U32_e32 8904, $vgpr0, implicit $exec
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORDX3 $agpr0_agpr1_agpr2, killed $vgpr0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s96) into %stack.1, align 4, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.226, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $vgpr40 = V_MOV_B32_e32 $sgpr32, implicit $exec
+  ; GFX90A-FLATSCR-NEXT:   $vgpr40 = V_ADD_U32_e32 8904, $vgpr40, implicit $exec
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORDX3 $agpr0_agpr1_agpr2, killed $vgpr40, 0, 0, implicit $exec, implicit $flat_scr :: (store (s96) into %stack.1, align 4, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.226, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX90A-FLATSCR-NEXT: {{  $}}
   ; GFX90A-FLATSCR-NEXT: bb.1:

diff  --git a/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll b/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll
index 03bcd58786139..533e85e42ec86 100644
--- a/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll
+++ b/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll
@@ -13,133 +13,133 @@ define void @no_free_vgprs_at_agpr_to_agpr_copy(float %v0, float %v1) #0 {
 ; GFX908-NEXT:    ;;#ASMSTART
 ; GFX908-NEXT:    ; def v[0:31] a[0:15]
 ; GFX908-NEXT:    ;;#ASMEND
-; GFX908-NEXT:    v_accvgpr_read_b32 v34, a15
+; GFX908-NEXT:    v_accvgpr_read_b32 v39, a15
 ; GFX908-NEXT:    s_nop 1
-; GFX908-NEXT:    v_accvgpr_write_b32 a31, v34
-; GFX908-NEXT:    v_accvgpr_read_b32 v34, a14
+; GFX908-NEXT:    v_accvgpr_write_b32 a31, v39
+; GFX908-NEXT:    v_accvgpr_read_b32 v39, a14
 ; GFX908-NEXT:    s_nop 1
-; GFX908-NEXT:    v_accvgpr_write_b32 a30, v34
-; GFX908-NEXT:    v_accvgpr_read_b32 v34, a13
+; GFX908-NEXT:    v_accvgpr_write_b32 a30, v39
+; GFX908-NEXT:    v_accvgpr_read_b32 v39, a13
 ; GFX908-NEXT:    s_nop 1
-; GFX908-NEXT:    v_accvgpr_write_b32 a29, v34
-; GFX908-NEXT:    v_accvgpr_read_b32 v34, a12
+; GFX908-NEXT:    v_accvgpr_write_b32 a29, v39
+; GFX908-NEXT:    v_accvgpr_read_b32 v39, a12
 ; GFX908-NEXT:    s_nop 1
-; GFX908-NEXT:    v_accvgpr_write_b32 a28, v34
-; GFX908-NEXT:    v_accvgpr_read_b32 v34, a11
+; GFX908-NEXT:    v_accvgpr_write_b32 a28, v39
+; GFX908-NEXT:    v_accvgpr_read_b32 v39, a11
 ; GFX908-NEXT:    s_nop 1
-; GFX908-NEXT:    v_accvgpr_write_b32 a27, v34
-; GFX908-NEXT:    v_accvgpr_read_b32 v34, a10
+; GFX908-NEXT:    v_accvgpr_write_b32 a27, v39
+; GFX908-NEXT:    v_accvgpr_read_b32 v39, a10
 ; GFX908-NEXT:    s_nop 1
-; GFX908-NEXT:    v_accvgpr_write_b32 a26, v34
-; GFX908-NEXT:    v_accvgpr_read_b32 v34, a9
+; GFX908-NEXT:    v_accvgpr_write_b32 a26, v39
+; GFX908-NEXT:    v_accvgpr_read_b32 v39, a9
 ; GFX908-NEXT:    s_nop 1
-; GFX908-NEXT:    v_accvgpr_write_b32 a25, v34
-; GFX908-NEXT:    v_accvgpr_read_b32 v34, a8
+; GFX908-NEXT:    v_accvgpr_write_b32 a25, v39
+; GFX908-NEXT:    v_accvgpr_read_b32 v39, a8
 ; GFX908-NEXT:    s_nop 1
-; GFX908-NEXT:    v_accvgpr_write_b32 a24, v34
-; GFX908-NEXT:    v_accvgpr_read_b32 v34, a7
+; GFX908-NEXT:    v_accvgpr_write_b32 a24, v39
+; GFX908-NEXT:    v_accvgpr_read_b32 v39, a7
 ; GFX908-NEXT:    s_nop 1
-; GFX908-NEXT:    v_accvgpr_write_b32 a23, v34
-; GFX908-NEXT:    v_accvgpr_read_b32 v34, a6
+; GFX908-NEXT:    v_accvgpr_write_b32 a23, v39
+; GFX908-NEXT:    v_accvgpr_read_b32 v39, a6
 ; GFX908-NEXT:    s_nop 1
-; GFX908-NEXT:    v_accvgpr_write_b32 a22, v34
-; GFX908-NEXT:    v_accvgpr_read_b32 v34, a5
+; GFX908-NEXT:    v_accvgpr_write_b32 a22, v39
+; GFX908-NEXT:    v_accvgpr_read_b32 v39, a5
 ; GFX908-NEXT:    s_nop 1
-; GFX908-NEXT:    v_accvgpr_write_b32 a21, v34
-; GFX908-NEXT:    v_accvgpr_read_b32 v34, a4
+; GFX908-NEXT:    v_accvgpr_write_b32 a21, v39
+; GFX908-NEXT:    v_accvgpr_read_b32 v39, a4
 ; GFX908-NEXT:    s_nop 1
-; GFX908-NEXT:    v_accvgpr_write_b32 a20, v34
-; GFX908-NEXT:    v_accvgpr_read_b32 v34, a3
+; GFX908-NEXT:    v_accvgpr_write_b32 a20, v39
+; GFX908-NEXT:    v_accvgpr_read_b32 v39, a3
 ; GFX908-NEXT:    s_nop 1
-; GFX908-NEXT:    v_accvgpr_write_b32 a19, v34
-; GFX908-NEXT:    v_accvgpr_read_b32 v34, a2
+; GFX908-NEXT:    v_accvgpr_write_b32 a19, v39
+; GFX908-NEXT:    v_accvgpr_read_b32 v39, a2
 ; GFX908-NEXT:    s_nop 1
-; GFX908-NEXT:    v_accvgpr_write_b32 a18, v34
-; GFX908-NEXT:    v_accvgpr_read_b32 v34, a1
+; GFX908-NEXT:    v_accvgpr_write_b32 a18, v39
+; GFX908-NEXT:    v_accvgpr_read_b32 v39, a1
 ; GFX908-NEXT:    s_nop 1
-; GFX908-NEXT:    v_accvgpr_write_b32 a17, v34
-; GFX908-NEXT:    v_accvgpr_read_b32 v34, a0
+; GFX908-NEXT:    v_accvgpr_write_b32 a17, v39
+; GFX908-NEXT:    v_accvgpr_read_b32 v39, a0
 ; GFX908-NEXT:    s_nop 1
-; GFX908-NEXT:    v_accvgpr_write_b32 a16, v34
+; GFX908-NEXT:    v_accvgpr_write_b32 a16, v39
 ; GFX908-NEXT:    s_nop 0
 ; GFX908-NEXT:    v_mfma_f32_16x16x1f32 a[0:15], v33, v32, a[16:31]
 ; GFX908-NEXT:    s_nop 7
 ; GFX908-NEXT:    s_nop 1
-; GFX908-NEXT:    v_accvgpr_read_b32 v34, a0 ; Reload Reuse
+; GFX908-NEXT:    v_accvgpr_read_b32 v39, a0 ; Reload Reuse
 ; GFX908-NEXT:    v_accvgpr_read_b32 v38, a11 ; Reload Reuse
 ; GFX908-NEXT:    v_accvgpr_read_b32 v37, a12 ; Reload Reuse
-; GFX908-NEXT:    buffer_store_dword v34, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX908-NEXT:    v_accvgpr_read_b32 v34, a1 ; Reload Reuse
+; GFX908-NEXT:    buffer_store_dword v39, off, s[0:3], s32 ; 4-byte Folded Spill
+; GFX908-NEXT:    v_accvgpr_read_b32 v39, a1 ; Reload Reuse
 ; GFX908-NEXT:    v_accvgpr_read_b32 v36, a13 ; Reload Reuse
 ; GFX908-NEXT:    v_accvgpr_read_b32 v35, a14 ; Reload Reuse
-; GFX908-NEXT:    buffer_store_dword v34, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
-; GFX908-NEXT:    v_accvgpr_read_b32 v34, a2 ; Reload Reuse
-; GFX908-NEXT:    s_nop 1
-; GFX908-NEXT:    buffer_store_dword v34, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
-; GFX908-NEXT:    v_accvgpr_read_b32 v34, a3 ; Reload Reuse
+; GFX908-NEXT:    buffer_store_dword v39, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; GFX908-NEXT:    v_accvgpr_read_b32 v39, a2 ; Reload Reuse
+; GFX908-NEXT:    v_accvgpr_read_b32 v34, a15 ; Reload Reuse
+; GFX908-NEXT:    s_nop 0
+; GFX908-NEXT:    buffer_store_dword v39, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
+; GFX908-NEXT:    v_accvgpr_read_b32 v39, a3 ; Reload Reuse
 ; GFX908-NEXT:    s_nop 1
-; GFX908-NEXT:    buffer_store_dword v34, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
-; GFX908-NEXT:    v_accvgpr_read_b32 v34, a4 ; Reload Reuse
+; GFX908-NEXT:    buffer_store_dword v39, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; GFX908-NEXT:    v_accvgpr_read_b32 v39, a4 ; Reload Reuse
 ; GFX908-NEXT:    s_nop 1
-; GFX908-NEXT:    buffer_store_dword v34, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
-; GFX908-NEXT:    v_accvgpr_read_b32 v34, a5 ; Reload Reuse
+; GFX908-NEXT:    buffer_store_dword v39, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
+; GFX908-NEXT:    v_accvgpr_read_b32 v39, a5 ; Reload Reuse
 ; GFX908-NEXT:    s_nop 1
-; GFX908-NEXT:    buffer_store_dword v34, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
-; GFX908-NEXT:    v_accvgpr_read_b32 v34, a6 ; Reload Reuse
+; GFX908-NEXT:    buffer_store_dword v39, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
+; GFX908-NEXT:    v_accvgpr_read_b32 v39, a6 ; Reload Reuse
 ; GFX908-NEXT:    s_nop 1
-; GFX908-NEXT:    buffer_store_dword v34, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
-; GFX908-NEXT:    v_accvgpr_read_b32 v34, a7 ; Reload Reuse
+; GFX908-NEXT:    buffer_store_dword v39, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
+; GFX908-NEXT:    v_accvgpr_read_b32 v39, a7 ; Reload Reuse
 ; GFX908-NEXT:    s_nop 1
-; GFX908-NEXT:    buffer_store_dword v34, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
-; GFX908-NEXT:    v_accvgpr_read_b32 v34, a8 ; Reload Reuse
+; GFX908-NEXT:    buffer_store_dword v39, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
+; GFX908-NEXT:    v_accvgpr_read_b32 v39, a8 ; Reload Reuse
 ; GFX908-NEXT:    s_nop 1
-; GFX908-NEXT:    buffer_store_dword v34, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
-; GFX908-NEXT:    v_accvgpr_read_b32 v34, a9 ; Reload Reuse
+; GFX908-NEXT:    buffer_store_dword v39, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
+; GFX908-NEXT:    v_accvgpr_read_b32 v39, a9 ; Reload Reuse
 ; GFX908-NEXT:    s_nop 1
-; GFX908-NEXT:    buffer_store_dword v34, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
-; GFX908-NEXT:    v_accvgpr_read_b32 v34, a10 ; Reload Reuse
+; GFX908-NEXT:    buffer_store_dword v39, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
+; GFX908-NEXT:    v_accvgpr_read_b32 v39, a10 ; Reload Reuse
 ; GFX908-NEXT:    s_nop 1
-; GFX908-NEXT:    buffer_store_dword v34, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
-; GFX908-NEXT:    v_accvgpr_read_b32 v34, a15 ; Reload Reuse
+; GFX908-NEXT:    buffer_store_dword v39, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
 ; GFX908-NEXT:    ;;#ASMSTART
 ; GFX908-NEXT:    ; copy
 ; GFX908-NEXT:    ;;#ASMEND
-; GFX908-NEXT:    v_accvgpr_read_b32 v34, a1
+; GFX908-NEXT:    v_accvgpr_read_b32 v39, a1
 ; GFX908-NEXT:    s_nop 1
-; GFX908-NEXT:    v_accvgpr_write_b32 a16, v34
-; GFX908-NEXT:    buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX908-NEXT:    v_accvgpr_write_b32 a16, v39
+; GFX908-NEXT:    buffer_load_dword v39, off, s[0:3], s32 ; 4-byte Folded Reload
 ; GFX908-NEXT:    s_waitcnt vmcnt(0)
-; GFX908-NEXT:    v_accvgpr_write_b32 a0, v34 ; Reload Reuse
-; GFX908-NEXT:    buffer_load_dword v34, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
+; GFX908-NEXT:    v_accvgpr_write_b32 a0, v39 ; Reload Reuse
+; GFX908-NEXT:    buffer_load_dword v39, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
 ; GFX908-NEXT:    s_waitcnt vmcnt(0)
-; GFX908-NEXT:    v_accvgpr_write_b32 a1, v34 ; Reload Reuse
-; GFX908-NEXT:    buffer_load_dword v34, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
+; GFX908-NEXT:    v_accvgpr_write_b32 a1, v39 ; Reload Reuse
+; GFX908-NEXT:    buffer_load_dword v39, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
 ; GFX908-NEXT:    s_waitcnt vmcnt(0)
-; GFX908-NEXT:    v_accvgpr_write_b32 a2, v34 ; Reload Reuse
-; GFX908-NEXT:    buffer_load_dword v34, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
+; GFX908-NEXT:    v_accvgpr_write_b32 a2, v39 ; Reload Reuse
+; GFX908-NEXT:    buffer_load_dword v39, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
 ; GFX908-NEXT:    s_waitcnt vmcnt(0)
-; GFX908-NEXT:    v_accvgpr_write_b32 a3, v34 ; Reload Reuse
-; GFX908-NEXT:    buffer_load_dword v34, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
+; GFX908-NEXT:    v_accvgpr_write_b32 a3, v39 ; Reload Reuse
+; GFX908-NEXT:    buffer_load_dword v39, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
 ; GFX908-NEXT:    s_waitcnt vmcnt(0)
-; GFX908-NEXT:    v_accvgpr_write_b32 a4, v34 ; Reload Reuse
-; GFX908-NEXT:    buffer_load_dword v34, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
+; GFX908-NEXT:    v_accvgpr_write_b32 a4, v39 ; Reload Reuse
+; GFX908-NEXT:    buffer_load_dword v39, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
 ; GFX908-NEXT:    s_waitcnt vmcnt(0)
-; GFX908-NEXT:    v_accvgpr_write_b32 a5, v34 ; Reload Reuse
-; GFX908-NEXT:    buffer_load_dword v34, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
+; GFX908-NEXT:    v_accvgpr_write_b32 a5, v39 ; Reload Reuse
+; GFX908-NEXT:    buffer_load_dword v39, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
 ; GFX908-NEXT:    s_waitcnt vmcnt(0)
-; GFX908-NEXT:    v_accvgpr_write_b32 a6, v34 ; Reload Reuse
-; GFX908-NEXT:    buffer_load_dword v34, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
+; GFX908-NEXT:    v_accvgpr_write_b32 a6, v39 ; Reload Reuse
+; GFX908-NEXT:    buffer_load_dword v39, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
 ; GFX908-NEXT:    s_waitcnt vmcnt(0)
-; GFX908-NEXT:    v_accvgpr_write_b32 a7, v34 ; Reload Reuse
-; GFX908-NEXT:    buffer_load_dword v34, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
+; GFX908-NEXT:    v_accvgpr_write_b32 a7, v39 ; Reload Reuse
+; GFX908-NEXT:    buffer_load_dword v39, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
 ; GFX908-NEXT:    s_waitcnt vmcnt(0)
-; GFX908-NEXT:    v_accvgpr_write_b32 a8, v34 ; Reload Reuse
-; GFX908-NEXT:    buffer_load_dword v34, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
+; GFX908-NEXT:    v_accvgpr_write_b32 a8, v39 ; Reload Reuse
+; GFX908-NEXT:    buffer_load_dword v39, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
 ; GFX908-NEXT:    s_waitcnt vmcnt(0)
-; GFX908-NEXT:    v_accvgpr_write_b32 a9, v34 ; Reload Reuse
-; GFX908-NEXT:    buffer_load_dword v34, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
+; GFX908-NEXT:    v_accvgpr_write_b32 a9, v39 ; Reload Reuse
+; GFX908-NEXT:    buffer_load_dword v39, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
 ; GFX908-NEXT:    s_waitcnt vmcnt(0)
-; GFX908-NEXT:    v_accvgpr_write_b32 a10, v34 ; Reload Reuse
+; GFX908-NEXT:    v_accvgpr_write_b32 a10, v39 ; Reload Reuse
 ; GFX908-NEXT:    v_accvgpr_write_b32 a11, v38 ; Reload Reuse
 ; GFX908-NEXT:    v_accvgpr_write_b32 a12, v37 ; Reload Reuse
 ; GFX908-NEXT:    v_accvgpr_write_b32 a13, v36 ; Reload Reuse
@@ -863,133 +863,133 @@ define void @no_free_vgprs_at_sgpr_to_agpr_copy(float %v0, float %v1) #0 {
 ; GFX908-NEXT:    ;;#ASMSTART
 ; GFX908-NEXT:    ; def v[0:31] s[0:15]
 ; GFX908-NEXT:    ;;#ASMEND
-; GFX908-NEXT:    v_mov_b32_e32 v34, s15
+; GFX908-NEXT:    v_mov_b32_e32 v39, s15
 ; GFX908-NEXT:    s_nop 1
-; GFX908-NEXT:    v_accvgpr_write_b32 a31, v34
-; GFX908-NEXT:    v_mov_b32_e32 v34, s14
+; GFX908-NEXT:    v_accvgpr_write_b32 a31, v39
+; GFX908-NEXT:    v_mov_b32_e32 v39, s14
 ; GFX908-NEXT:    s_nop 1
-; GFX908-NEXT:    v_accvgpr_write_b32 a30, v34
-; GFX908-NEXT:    v_mov_b32_e32 v34, s13
+; GFX908-NEXT:    v_accvgpr_write_b32 a30, v39
+; GFX908-NEXT:    v_mov_b32_e32 v39, s13
 ; GFX908-NEXT:    s_nop 1
-; GFX908-NEXT:    v_accvgpr_write_b32 a29, v34
-; GFX908-NEXT:    v_mov_b32_e32 v34, s12
+; GFX908-NEXT:    v_accvgpr_write_b32 a29, v39
+; GFX908-NEXT:    v_mov_b32_e32 v39, s12
 ; GFX908-NEXT:    s_nop 1
-; GFX908-NEXT:    v_accvgpr_write_b32 a28, v34
-; GFX908-NEXT:    v_mov_b32_e32 v34, s11
+; GFX908-NEXT:    v_accvgpr_write_b32 a28, v39
+; GFX908-NEXT:    v_mov_b32_e32 v39, s11
 ; GFX908-NEXT:    s_nop 1
-; GFX908-NEXT:    v_accvgpr_write_b32 a27, v34
-; GFX908-NEXT:    v_mov_b32_e32 v34, s10
+; GFX908-NEXT:    v_accvgpr_write_b32 a27, v39
+; GFX908-NEXT:    v_mov_b32_e32 v39, s10
 ; GFX908-NEXT:    s_nop 1
-; GFX908-NEXT:    v_accvgpr_write_b32 a26, v34
-; GFX908-NEXT:    v_mov_b32_e32 v34, s9
+; GFX908-NEXT:    v_accvgpr_write_b32 a26, v39
+; GFX908-NEXT:    v_mov_b32_e32 v39, s9
 ; GFX908-NEXT:    s_nop 1
-; GFX908-NEXT:    v_accvgpr_write_b32 a25, v34
-; GFX908-NEXT:    v_mov_b32_e32 v34, s8
+; GFX908-NEXT:    v_accvgpr_write_b32 a25, v39
+; GFX908-NEXT:    v_mov_b32_e32 v39, s8
 ; GFX908-NEXT:    s_nop 1
-; GFX908-NEXT:    v_accvgpr_write_b32 a24, v34
-; GFX908-NEXT:    v_mov_b32_e32 v34, s7
+; GFX908-NEXT:    v_accvgpr_write_b32 a24, v39
+; GFX908-NEXT:    v_mov_b32_e32 v39, s7
 ; GFX908-NEXT:    s_nop 1
-; GFX908-NEXT:    v_accvgpr_write_b32 a23, v34
-; GFX908-NEXT:    v_mov_b32_e32 v34, s6
+; GFX908-NEXT:    v_accvgpr_write_b32 a23, v39
+; GFX908-NEXT:    v_mov_b32_e32 v39, s6
 ; GFX908-NEXT:    s_nop 1
-; GFX908-NEXT:    v_accvgpr_write_b32 a22, v34
-; GFX908-NEXT:    v_mov_b32_e32 v34, s5
+; GFX908-NEXT:    v_accvgpr_write_b32 a22, v39
+; GFX908-NEXT:    v_mov_b32_e32 v39, s5
 ; GFX908-NEXT:    s_nop 1
-; GFX908-NEXT:    v_accvgpr_write_b32 a21, v34
-; GFX908-NEXT:    v_mov_b32_e32 v34, s4
+; GFX908-NEXT:    v_accvgpr_write_b32 a21, v39
+; GFX908-NEXT:    v_mov_b32_e32 v39, s4
 ; GFX908-NEXT:    s_nop 1
-; GFX908-NEXT:    v_accvgpr_write_b32 a20, v34
-; GFX908-NEXT:    v_mov_b32_e32 v34, s3
+; GFX908-NEXT:    v_accvgpr_write_b32 a20, v39
+; GFX908-NEXT:    v_mov_b32_e32 v39, s3
 ; GFX908-NEXT:    s_nop 1
-; GFX908-NEXT:    v_accvgpr_write_b32 a19, v34
-; GFX908-NEXT:    v_mov_b32_e32 v34, s2
+; GFX908-NEXT:    v_accvgpr_write_b32 a19, v39
+; GFX908-NEXT:    v_mov_b32_e32 v39, s2
 ; GFX908-NEXT:    s_nop 1
-; GFX908-NEXT:    v_accvgpr_write_b32 a18, v34
-; GFX908-NEXT:    v_mov_b32_e32 v34, s1
+; GFX908-NEXT:    v_accvgpr_write_b32 a18, v39
+; GFX908-NEXT:    v_mov_b32_e32 v39, s1
 ; GFX908-NEXT:    s_nop 1
-; GFX908-NEXT:    v_accvgpr_write_b32 a17, v34
-; GFX908-NEXT:    v_mov_b32_e32 v34, s0
+; GFX908-NEXT:    v_accvgpr_write_b32 a17, v39
+; GFX908-NEXT:    v_mov_b32_e32 v39, s0
 ; GFX908-NEXT:    s_nop 1
-; GFX908-NEXT:    v_accvgpr_write_b32 a16, v34
+; GFX908-NEXT:    v_accvgpr_write_b32 a16, v39
 ; GFX908-NEXT:    s_nop 0
 ; GFX908-NEXT:    v_mfma_f32_16x16x1f32 a[0:15], v33, v32, a[16:31]
 ; GFX908-NEXT:    s_nop 7
 ; GFX908-NEXT:    s_nop 1
-; GFX908-NEXT:    v_accvgpr_read_b32 v34, a0 ; Reload Reuse
+; GFX908-NEXT:    v_accvgpr_read_b32 v39, a0 ; Reload Reuse
 ; GFX908-NEXT:    v_accvgpr_read_b32 v38, a11 ; Reload Reuse
 ; GFX908-NEXT:    v_accvgpr_read_b32 v37, a12 ; Reload Reuse
-; GFX908-NEXT:    buffer_store_dword v34, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX908-NEXT:    v_accvgpr_read_b32 v34, a1 ; Reload Reuse
+; GFX908-NEXT:    buffer_store_dword v39, off, s[0:3], s32 ; 4-byte Folded Spill
+; GFX908-NEXT:    v_accvgpr_read_b32 v39, a1 ; Reload Reuse
 ; GFX908-NEXT:    v_accvgpr_read_b32 v36, a13 ; Reload Reuse
 ; GFX908-NEXT:    v_accvgpr_read_b32 v35, a14 ; Reload Reuse
-; GFX908-NEXT:    buffer_store_dword v34, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
-; GFX908-NEXT:    v_accvgpr_read_b32 v34, a2 ; Reload Reuse
-; GFX908-NEXT:    s_nop 1
-; GFX908-NEXT:    buffer_store_dword v34, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
-; GFX908-NEXT:    v_accvgpr_read_b32 v34, a3 ; Reload Reuse
+; GFX908-NEXT:    buffer_store_dword v39, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; GFX908-NEXT:    v_accvgpr_read_b32 v39, a2 ; Reload Reuse
+; GFX908-NEXT:    v_accvgpr_read_b32 v34, a15 ; Reload Reuse
+; GFX908-NEXT:    s_nop 0
+; GFX908-NEXT:    buffer_store_dword v39, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
+; GFX908-NEXT:    v_accvgpr_read_b32 v39, a3 ; Reload Reuse
 ; GFX908-NEXT:    s_nop 1
-; GFX908-NEXT:    buffer_store_dword v34, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
-; GFX908-NEXT:    v_accvgpr_read_b32 v34, a4 ; Reload Reuse
+; GFX908-NEXT:    buffer_store_dword v39, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; GFX908-NEXT:    v_accvgpr_read_b32 v39, a4 ; Reload Reuse
 ; GFX908-NEXT:    s_nop 1
-; GFX908-NEXT:    buffer_store_dword v34, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
-; GFX908-NEXT:    v_accvgpr_read_b32 v34, a5 ; Reload Reuse
+; GFX908-NEXT:    buffer_store_dword v39, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
+; GFX908-NEXT:    v_accvgpr_read_b32 v39, a5 ; Reload Reuse
 ; GFX908-NEXT:    s_nop 1
-; GFX908-NEXT:    buffer_store_dword v34, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
-; GFX908-NEXT:    v_accvgpr_read_b32 v34, a6 ; Reload Reuse
+; GFX908-NEXT:    buffer_store_dword v39, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
+; GFX908-NEXT:    v_accvgpr_read_b32 v39, a6 ; Reload Reuse
 ; GFX908-NEXT:    s_nop 1
-; GFX908-NEXT:    buffer_store_dword v34, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
-; GFX908-NEXT:    v_accvgpr_read_b32 v34, a7 ; Reload Reuse
+; GFX908-NEXT:    buffer_store_dword v39, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
+; GFX908-NEXT:    v_accvgpr_read_b32 v39, a7 ; Reload Reuse
 ; GFX908-NEXT:    s_nop 1
-; GFX908-NEXT:    buffer_store_dword v34, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
-; GFX908-NEXT:    v_accvgpr_read_b32 v34, a8 ; Reload Reuse
+; GFX908-NEXT:    buffer_store_dword v39, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
+; GFX908-NEXT:    v_accvgpr_read_b32 v39, a8 ; Reload Reuse
 ; GFX908-NEXT:    s_nop 1
-; GFX908-NEXT:    buffer_store_dword v34, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
-; GFX908-NEXT:    v_accvgpr_read_b32 v34, a9 ; Reload Reuse
+; GFX908-NEXT:    buffer_store_dword v39, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
+; GFX908-NEXT:    v_accvgpr_read_b32 v39, a9 ; Reload Reuse
 ; GFX908-NEXT:    s_nop 1
-; GFX908-NEXT:    buffer_store_dword v34, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
-; GFX908-NEXT:    v_accvgpr_read_b32 v34, a10 ; Reload Reuse
+; GFX908-NEXT:    buffer_store_dword v39, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
+; GFX908-NEXT:    v_accvgpr_read_b32 v39, a10 ; Reload Reuse
 ; GFX908-NEXT:    s_nop 1
-; GFX908-NEXT:    buffer_store_dword v34, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
-; GFX908-NEXT:    v_accvgpr_read_b32 v34, a15 ; Reload Reuse
+; GFX908-NEXT:    buffer_store_dword v39, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
 ; GFX908-NEXT:    ;;#ASMSTART
 ; GFX908-NEXT:    ; copy
 ; GFX908-NEXT:    ;;#ASMEND
-; GFX908-NEXT:    v_accvgpr_read_b32 v34, a1
+; GFX908-NEXT:    v_accvgpr_read_b32 v39, a1
 ; GFX908-NEXT:    s_nop 1
-; GFX908-NEXT:    v_accvgpr_write_b32 a32, v34
-; GFX908-NEXT:    buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX908-NEXT:    v_accvgpr_write_b32 a32, v39
+; GFX908-NEXT:    buffer_load_dword v39, off, s[0:3], s32 ; 4-byte Folded Reload
 ; GFX908-NEXT:    s_waitcnt vmcnt(0)
-; GFX908-NEXT:    v_accvgpr_write_b32 a0, v34 ; Reload Reuse
-; GFX908-NEXT:    buffer_load_dword v34, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
+; GFX908-NEXT:    v_accvgpr_write_b32 a0, v39 ; Reload Reuse
+; GFX908-NEXT:    buffer_load_dword v39, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
 ; GFX908-NEXT:    s_waitcnt vmcnt(0)
-; GFX908-NEXT:    v_accvgpr_write_b32 a1, v34 ; Reload Reuse
-; GFX908-NEXT:    buffer_load_dword v34, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
+; GFX908-NEXT:    v_accvgpr_write_b32 a1, v39 ; Reload Reuse
+; GFX908-NEXT:    buffer_load_dword v39, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
 ; GFX908-NEXT:    s_waitcnt vmcnt(0)
-; GFX908-NEXT:    v_accvgpr_write_b32 a2, v34 ; Reload Reuse
-; GFX908-NEXT:    buffer_load_dword v34, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
+; GFX908-NEXT:    v_accvgpr_write_b32 a2, v39 ; Reload Reuse
+; GFX908-NEXT:    buffer_load_dword v39, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
 ; GFX908-NEXT:    s_waitcnt vmcnt(0)
-; GFX908-NEXT:    v_accvgpr_write_b32 a3, v34 ; Reload Reuse
-; GFX908-NEXT:    buffer_load_dword v34, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
+; GFX908-NEXT:    v_accvgpr_write_b32 a3, v39 ; Reload Reuse
+; GFX908-NEXT:    buffer_load_dword v39, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
 ; GFX908-NEXT:    s_waitcnt vmcnt(0)
-; GFX908-NEXT:    v_accvgpr_write_b32 a4, v34 ; Reload Reuse
-; GFX908-NEXT:    buffer_load_dword v34, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
+; GFX908-NEXT:    v_accvgpr_write_b32 a4, v39 ; Reload Reuse
+; GFX908-NEXT:    buffer_load_dword v39, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
 ; GFX908-NEXT:    s_waitcnt vmcnt(0)
-; GFX908-NEXT:    v_accvgpr_write_b32 a5, v34 ; Reload Reuse
-; GFX908-NEXT:    buffer_load_dword v34, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
+; GFX908-NEXT:    v_accvgpr_write_b32 a5, v39 ; Reload Reuse
+; GFX908-NEXT:    buffer_load_dword v39, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
 ; GFX908-NEXT:    s_waitcnt vmcnt(0)
-; GFX908-NEXT:    v_accvgpr_write_b32 a6, v34 ; Reload Reuse
-; GFX908-NEXT:    buffer_load_dword v34, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
+; GFX908-NEXT:    v_accvgpr_write_b32 a6, v39 ; Reload Reuse
+; GFX908-NEXT:    buffer_load_dword v39, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
 ; GFX908-NEXT:    s_waitcnt vmcnt(0)
-; GFX908-NEXT:    v_accvgpr_write_b32 a7, v34 ; Reload Reuse
-; GFX908-NEXT:    buffer_load_dword v34, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
+; GFX908-NEXT:    v_accvgpr_write_b32 a7, v39 ; Reload Reuse
+; GFX908-NEXT:    buffer_load_dword v39, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
 ; GFX908-NEXT:    s_waitcnt vmcnt(0)
-; GFX908-NEXT:    v_accvgpr_write_b32 a8, v34 ; Reload Reuse
-; GFX908-NEXT:    buffer_load_dword v34, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
+; GFX908-NEXT:    v_accvgpr_write_b32 a8, v39 ; Reload Reuse
+; GFX908-NEXT:    buffer_load_dword v39, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
 ; GFX908-NEXT:    s_waitcnt vmcnt(0)
-; GFX908-NEXT:    v_accvgpr_write_b32 a9, v34 ; Reload Reuse
-; GFX908-NEXT:    buffer_load_dword v34, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
+; GFX908-NEXT:    v_accvgpr_write_b32 a9, v39 ; Reload Reuse
+; GFX908-NEXT:    buffer_load_dword v39, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
 ; GFX908-NEXT:    s_waitcnt vmcnt(0)
-; GFX908-NEXT:    v_accvgpr_write_b32 a10, v34 ; Reload Reuse
+; GFX908-NEXT:    v_accvgpr_write_b32 a10, v39 ; Reload Reuse
 ; GFX908-NEXT:    v_accvgpr_write_b32 a11, v38 ; Reload Reuse
 ; GFX908-NEXT:    v_accvgpr_write_b32 a12, v37 ; Reload Reuse
 ; GFX908-NEXT:    v_accvgpr_write_b32 a13, v36 ; Reload Reuse

diff  --git a/llvm/test/CodeGen/AMDGPU/copy-vgpr-clobber-spill-vgpr.mir b/llvm/test/CodeGen/AMDGPU/copy-vgpr-clobber-spill-vgpr.mir
new file mode 100644
index 0000000000000..7efdbcba81a97
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/copy-vgpr-clobber-spill-vgpr.mir
@@ -0,0 +1,417 @@
+# NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs -start-before=prologepilog %s -o - | FileCheck --check-prefix=GFX908 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -start-before=prologepilog %s -o - | FileCheck --check-prefix=GFX90A %s
+
+--- |
+
+  define amdgpu_kernel void @test_spill() #0 {
+  ; GFX908-LABEL: test_spill:
+  ; GFX908:       ; %bb.0:
+  ; GFX908-NEXT:    ; implicit-def: $agpr96_agpr97_agpr98_agpr99_agpr100_agpr101_agpr102_agpr103_agpr104_agpr105_agpr106_agpr107_agpr108_agpr109_agpr110_agpr111
+  ; GFX908-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+  ; GFX908-NEXT:    v_accvgpr_read_b32 v63, a96
+  ; GFX908-NEXT:    ; implicit-def: $vgpr0
+  ; GFX908-NEXT:    ; implicit-def: $vgpr1
+  ; GFX908-NEXT:    ; implicit-def: $vgpr2
+  ; GFX908-NEXT:    ; implicit-def: $vgpr3
+  ; GFX908-NEXT:    ; implicit-def: $vgpr4
+  ; GFX908-NEXT:    ; implicit-def: $vgpr5
+  ; GFX908-NEXT:    ; implicit-def: $vgpr6
+  ; GFX908-NEXT:    ; implicit-def: $vgpr7
+  ; GFX908-NEXT:    ; implicit-def: $vgpr8
+  ; GFX908-NEXT:    ; implicit-def: $vgpr9
+  ; GFX908-NEXT:    ; implicit-def: $vgpr10
+  ; GFX908-NEXT:    ; implicit-def: $vgpr11
+  ; GFX908-NEXT:    ; implicit-def: $vgpr12
+  ; GFX908-NEXT:    ; implicit-def: $vgpr13
+  ; GFX908-NEXT:    ; implicit-def: $vgpr14
+  ; GFX908-NEXT:    ; implicit-def: $vgpr15
+  ; GFX908-NEXT:    ; implicit-def: $vgpr16
+  ; GFX908-NEXT:    ; implicit-def: $vgpr17
+  ; GFX908-NEXT:    ; implicit-def: $vgpr18
+  ; GFX908-NEXT:    ; implicit-def: $vgpr19
+  ; GFX908-NEXT:    ; implicit-def: $vgpr20
+  ; GFX908-NEXT:    ; implicit-def: $vgpr21
+  ; GFX908-NEXT:    ; implicit-def: $vgpr22
+  ; GFX908-NEXT:    ; implicit-def: $vgpr23
+  ; GFX908-NEXT:    ; implicit-def: $vgpr24
+  ; GFX908-NEXT:    ; implicit-def: $vgpr25
+  ; GFX908-NEXT:    ; implicit-def: $vgpr26
+  ; GFX908-NEXT:    ; implicit-def: $vgpr27
+  ; GFX908-NEXT:    ; implicit-def: $vgpr28
+  ; GFX908-NEXT:    ; implicit-def: $vgpr29
+  ; GFX908-NEXT:    ; implicit-def: $vgpr30
+  ; GFX908-NEXT:    ; implicit-def: $vgpr31
+  ; GFX908-NEXT:    ; implicit-def: $vgpr32
+  ; GFX908-NEXT:    ; implicit-def: $vgpr33
+  ; GFX908-NEXT:    ; implicit-def: $vgpr34
+  ; GFX908-NEXT:    ; implicit-def: $vgpr35
+  ; GFX908-NEXT:    ; implicit-def: $vgpr36
+  ; GFX908-NEXT:    ; implicit-def: $vgpr37
+  ; GFX908-NEXT:    ; implicit-def: $vgpr38
+  ; GFX908-NEXT:    ; implicit-def: $vgpr39
+  ; GFX908-NEXT:    ; implicit-def: $vgpr40
+  ; GFX908-NEXT:    ; implicit-def: $vgpr41
+  ; GFX908-NEXT:    ; implicit-def: $vgpr42
+  ; GFX908-NEXT:    ; implicit-def: $vgpr43
+  ; GFX908-NEXT:    ; implicit-def: $vgpr44
+  ; GFX908-NEXT:    ; implicit-def: $vgpr45
+  ; GFX908-NEXT:    ; implicit-def: $vgpr46
+  ; GFX908-NEXT:    ; implicit-def: $vgpr47
+  ; GFX908-NEXT:    ; implicit-def: $vgpr48
+  ; GFX908-NEXT:    ; implicit-def: $vgpr49
+  ; GFX908-NEXT:    ; implicit-def: $vgpr50
+  ; GFX908-NEXT:    ; implicit-def: $vgpr51
+  ; GFX908-NEXT:    ; implicit-def: $vgpr52
+  ; GFX908-NEXT:    ; implicit-def: $vgpr53
+  ; GFX908-NEXT:    ; implicit-def: $vgpr54
+  ; GFX908-NEXT:    ; implicit-def: $vgpr55
+  ; GFX908-NEXT:    ; implicit-def: $vgpr56
+  ; GFX908-NEXT:    ; implicit-def: $vgpr57
+  ; GFX908-NEXT:    ; implicit-def: $vgpr58
+  ; GFX908-NEXT:    ; implicit-def: $vgpr59
+  ; GFX908-NEXT:    ; implicit-def: $vgpr60
+  ; GFX908-NEXT:    ; implicit-def: $vgpr61
+  ; GFX908-NEXT:    s_nop 1
+  ; GFX908-NEXT:    v_accvgpr_write_b32 a64, v63
+  ; GFX908-NEXT:    v_accvgpr_read_b32 v63, a97
+  ; GFX908-NEXT:    s_nop 1
+  ; GFX908-NEXT:    v_accvgpr_write_b32 a65, v63
+  ; GFX908-NEXT:    v_accvgpr_read_b32 v63, a98
+  ; GFX908-NEXT:    s_nop 1
+  ; GFX908-NEXT:    v_accvgpr_write_b32 a66, v63
+  ; GFX908-NEXT:    v_accvgpr_read_b32 v63, a99
+  ; GFX908-NEXT:    s_nop 1
+  ; GFX908-NEXT:    v_accvgpr_write_b32 a67, v63
+  ; GFX908-NEXT:    v_accvgpr_read_b32 v63, a100
+  ; GFX908-NEXT:    s_nop 1
+  ; GFX908-NEXT:    v_accvgpr_write_b32 a68, v63
+  ; GFX908-NEXT:    v_accvgpr_read_b32 v63, a101
+  ; GFX908-NEXT:    s_nop 1
+  ; GFX908-NEXT:    v_accvgpr_write_b32 a69, v63
+  ; GFX908-NEXT:    v_accvgpr_read_b32 v63, a102
+  ; GFX908-NEXT:    s_nop 1
+  ; GFX908-NEXT:    v_accvgpr_write_b32 a70, v63
+  ; GFX908-NEXT:    v_accvgpr_read_b32 v63, a103
+  ; GFX908-NEXT:    s_nop 1
+  ; GFX908-NEXT:    v_accvgpr_write_b32 a71, v63
+  ; GFX908-NEXT:    v_accvgpr_read_b32 v63, a104
+  ; GFX908-NEXT:    s_nop 1
+  ; GFX908-NEXT:    v_accvgpr_write_b32 a72, v63
+  ; GFX908-NEXT:    v_accvgpr_read_b32 v63, a105
+  ; GFX908-NEXT:    s_nop 1
+  ; GFX908-NEXT:    v_accvgpr_write_b32 a73, v63
+  ; GFX908-NEXT:    v_accvgpr_read_b32 v63, a106
+  ; GFX908-NEXT:    s_nop 1
+  ; GFX908-NEXT:    v_accvgpr_write_b32 a74, v63
+  ; GFX908-NEXT:    v_accvgpr_read_b32 v63, a107
+  ; GFX908-NEXT:    s_nop 1
+  ; GFX908-NEXT:    v_accvgpr_write_b32 a75, v63
+  ; GFX908-NEXT:    v_accvgpr_read_b32 v63, a108
+  ; GFX908-NEXT:    s_nop 1
+  ; GFX908-NEXT:    v_accvgpr_write_b32 a76, v63
+  ; GFX908-NEXT:    v_accvgpr_read_b32 v63, a109
+  ; GFX908-NEXT:    s_nop 1
+  ; GFX908-NEXT:    v_accvgpr_write_b32 a77, v63
+  ; GFX908-NEXT:    v_accvgpr_read_b32 v63, a110
+  ; GFX908-NEXT:    s_nop 1
+  ; GFX908-NEXT:    v_accvgpr_write_b32 a78, v63
+  ; GFX908-NEXT:    v_accvgpr_read_b32 v63, a111
+  ; GFX908-NEXT:    s_nop 1
+  ; GFX908-NEXT:    v_accvgpr_write_b32 a79, v63
+  ; GFX908-NEXT:    v_accvgpr_read_b32 v63, a96 ; Reload Reuse
+  ; GFX908-NEXT:    v_accvgpr_read_b32 v62, a111 ; Reload Reuse
+  ; GFX908-NEXT:    s_nop 0
+  ; GFX908-NEXT:    buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill
+  ; GFX908-NEXT:    v_accvgpr_read_b32 v63, a97 ; Reload Reuse
+  ; GFX908-NEXT:    s_nop 1
+  ; GFX908-NEXT:    buffer_store_dword v63, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+  ; GFX908-NEXT:    v_accvgpr_read_b32 v63, a98 ; Reload Reuse
+  ; GFX908-NEXT:    s_nop 1
+  ; GFX908-NEXT:    buffer_store_dword v63, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
+  ; GFX908-NEXT:    v_accvgpr_read_b32 v63, a99 ; Reload Reuse
+  ; GFX908-NEXT:    s_nop 1
+  ; GFX908-NEXT:    buffer_store_dword v63, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+  ; GFX908-NEXT:    v_accvgpr_read_b32 v63, a100 ; Reload Reuse
+  ; GFX908-NEXT:    s_nop 1
+  ; GFX908-NEXT:    buffer_store_dword v63, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
+  ; GFX908-NEXT:    v_accvgpr_read_b32 v63, a101 ; Reload Reuse
+  ; GFX908-NEXT:    s_nop 1
+  ; GFX908-NEXT:    buffer_store_dword v63, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
+  ; GFX908-NEXT:    v_accvgpr_read_b32 v63, a102 ; Reload Reuse
+  ; GFX908-NEXT:    s_nop 1
+  ; GFX908-NEXT:    buffer_store_dword v63, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
+  ; GFX908-NEXT:    v_accvgpr_read_b32 v63, a103 ; Reload Reuse
+  ; GFX908-NEXT:    s_nop 1
+  ; GFX908-NEXT:    buffer_store_dword v63, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
+  ; GFX908-NEXT:    v_accvgpr_read_b32 v63, a104 ; Reload Reuse
+  ; GFX908-NEXT:    s_nop 1
+  ; GFX908-NEXT:    buffer_store_dword v63, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
+  ; GFX908-NEXT:    v_accvgpr_read_b32 v63, a105 ; Reload Reuse
+  ; GFX908-NEXT:    s_nop 1
+  ; GFX908-NEXT:    buffer_store_dword v63, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
+  ; GFX908-NEXT:    v_accvgpr_read_b32 v63, a106 ; Reload Reuse
+  ; GFX908-NEXT:    s_nop 1
+  ; GFX908-NEXT:    buffer_store_dword v63, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
+  ; GFX908-NEXT:    v_accvgpr_read_b32 v63, a107 ; Reload Reuse
+  ; GFX908-NEXT:    s_nop 1
+  ; GFX908-NEXT:    buffer_store_dword v63, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
+  ; GFX908-NEXT:    v_accvgpr_read_b32 v63, a108 ; Reload Reuse
+  ; GFX908-NEXT:    s_nop 1
+  ; GFX908-NEXT:    buffer_store_dword v63, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
+  ; GFX908-NEXT:    v_accvgpr_read_b32 v63, a109 ; Reload Reuse
+  ; GFX908-NEXT:    s_nop 1
+  ; GFX908-NEXT:    buffer_store_dword v63, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
+  ; GFX908-NEXT:    v_accvgpr_read_b32 v63, a110 ; Reload Reuse
+  ; GFX908-NEXT:    s_nop 1
+  ; GFX908-NEXT:    buffer_store_dword v63, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
+  ; GFX908-NEXT:    v_accvgpr_read_b32 v63, a96 ; Reload Reuse
+  ; GFX908-NEXT:    s_nop 1
+  ; GFX908-NEXT:    buffer_store_dword v63, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
+  ; GFX908-NEXT:    v_accvgpr_read_b32 v63, a97 ; Reload Reuse
+  ; GFX908-NEXT:    s_nop 1
+  ; GFX908-NEXT:    buffer_store_dword v63, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
+  ; GFX908-NEXT:    v_accvgpr_read_b32 v63, a98 ; Reload Reuse
+  ; GFX908-NEXT:    s_nop 1
+  ; GFX908-NEXT:    buffer_store_dword v63, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
+  ; GFX908-NEXT:    v_accvgpr_read_b32 v63, a99 ; Reload Reuse
+  ; GFX908-NEXT:    s_nop 1
+  ; GFX908-NEXT:    buffer_store_dword v63, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
+  ; GFX908-NEXT:    v_accvgpr_read_b32 v63, a100 ; Reload Reuse
+  ; GFX908-NEXT:    s_nop 1
+  ; GFX908-NEXT:    buffer_store_dword v63, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
+  ; GFX908-NEXT:    v_accvgpr_read_b32 v63, a101 ; Reload Reuse
+  ; GFX908-NEXT:    s_nop 1
+  ; GFX908-NEXT:    buffer_store_dword v63, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
+  ; GFX908-NEXT:    v_accvgpr_read_b32 v63, a102 ; Reload Reuse
+  ; GFX908-NEXT:    s_nop 1
+  ; GFX908-NEXT:    buffer_store_dword v63, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
+  ; GFX908-NEXT:    v_accvgpr_read_b32 v63, a103 ; Reload Reuse
+  ; GFX908-NEXT:    s_nop 1
+  ; GFX908-NEXT:    buffer_store_dword v63, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
+  ; GFX908-NEXT:    v_accvgpr_read_b32 v63, a104 ; Reload Reuse
+  ; GFX908-NEXT:    s_nop 1
+  ; GFX908-NEXT:    buffer_store_dword v63, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
+  ; GFX908-NEXT:    v_accvgpr_read_b32 v63, a105 ; Reload Reuse
+  ; GFX908-NEXT:    s_nop 1
+  ; GFX908-NEXT:    buffer_store_dword v63, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
+  ; GFX908-NEXT:    v_accvgpr_read_b32 v63, a106 ; Reload Reuse
+  ; GFX908-NEXT:    s_nop 1
+  ; GFX908-NEXT:    buffer_store_dword v63, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
+  ; GFX908-NEXT:    v_accvgpr_read_b32 v63, a107 ; Reload Reuse
+  ; GFX908-NEXT:    s_nop 1
+  ; GFX908-NEXT:    buffer_store_dword v63, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill
+  ; GFX908-NEXT:    v_accvgpr_read_b32 v63, a108 ; Reload Reuse
+  ; GFX908-NEXT:    s_nop 1
+  ; GFX908-NEXT:    buffer_store_dword v63, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill
+  ; GFX908-NEXT:    v_accvgpr_read_b32 v63, a109 ; Reload Reuse
+  ; GFX908-NEXT:    s_nop 1
+  ; GFX908-NEXT:    buffer_store_dword v63, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill
+  ; GFX908-NEXT:    v_accvgpr_read_b32 v63, a110 ; Reload Reuse
+  ; GFX908-NEXT:    s_nop 1
+  ; GFX908-NEXT:    buffer_store_dword v63, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill
+  ; GFX908-NEXT:    v_accvgpr_read_b32 v63, a111 ; Reload Reuse
+  ; GFX908-NEXT:    s_nop 1
+  ; GFX908-NEXT:    buffer_store_dword v63, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill
+  ;
+  ; GFX90A-LABEL: test_spill:
+  ; GFX90A:       ; %bb.0:
+  ; GFX90A-NEXT:    ; implicit-def: $agpr96_agpr97_agpr98_agpr99_agpr100_agpr101_agpr102_agpr103_agpr104_agpr105_agpr106_agpr107_agpr108_agpr109_agpr110_agpr111
+  ; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+  ; GFX90A-NEXT:    v_accvgpr_mov_b32 a64, a96
+  ; GFX90A-NEXT:    v_accvgpr_mov_b32 a65, a97
+  ; GFX90A-NEXT:    v_accvgpr_mov_b32 a66, a98
+  ; GFX90A-NEXT:    v_accvgpr_mov_b32 a67, a99
+  ; GFX90A-NEXT:    v_accvgpr_mov_b32 a68, a100
+  ; GFX90A-NEXT:    v_accvgpr_mov_b32 a69, a101
+  ; GFX90A-NEXT:    v_accvgpr_mov_b32 a70, a102
+  ; GFX90A-NEXT:    v_accvgpr_mov_b32 a71, a103
+  ; GFX90A-NEXT:    v_accvgpr_mov_b32 a72, a104
+  ; GFX90A-NEXT:    v_accvgpr_mov_b32 a73, a105
+  ; GFX90A-NEXT:    v_accvgpr_mov_b32 a74, a106
+  ; GFX90A-NEXT:    v_accvgpr_mov_b32 a75, a107
+  ; GFX90A-NEXT:    v_accvgpr_mov_b32 a76, a108
+  ; GFX90A-NEXT:    v_accvgpr_mov_b32 a77, a109
+  ; GFX90A-NEXT:    v_accvgpr_mov_b32 a78, a110
+  ; GFX90A-NEXT:    v_accvgpr_mov_b32 a79, a111
+  ; GFX90A-NEXT:    v_accvgpr_read_b32 v77, a96 ; Reload Reuse
+  ; GFX90A-NEXT:    ; implicit-def: $vgpr0
+  ; GFX90A-NEXT:    ; implicit-def: $vgpr1
+  ; GFX90A-NEXT:    ; implicit-def: $vgpr2
+  ; GFX90A-NEXT:    ; implicit-def: $vgpr3
+  ; GFX90A-NEXT:    ; implicit-def: $vgpr4
+  ; GFX90A-NEXT:    ; implicit-def: $vgpr5
+  ; GFX90A-NEXT:    ; implicit-def: $vgpr6
+  ; GFX90A-NEXT:    ; implicit-def: $vgpr7
+  ; GFX90A-NEXT:    ; implicit-def: $vgpr8
+  ; GFX90A-NEXT:    ; implicit-def: $vgpr9
+  ; GFX90A-NEXT:    ; implicit-def: $vgpr10
+  ; GFX90A-NEXT:    ; implicit-def: $vgpr11
+  ; GFX90A-NEXT:    ; implicit-def: $vgpr12
+  ; GFX90A-NEXT:    ; implicit-def: $vgpr13
+  ; GFX90A-NEXT:    ; implicit-def: $vgpr14
+  ; GFX90A-NEXT:    ; implicit-def: $vgpr15
+  ; GFX90A-NEXT:    ; implicit-def: $vgpr16
+  ; GFX90A-NEXT:    ; implicit-def: $vgpr17
+  ; GFX90A-NEXT:    ; implicit-def: $vgpr18
+  ; GFX90A-NEXT:    ; implicit-def: $vgpr19
+  ; GFX90A-NEXT:    ; implicit-def: $vgpr20
+  ; GFX90A-NEXT:    ; implicit-def: $vgpr21
+  ; GFX90A-NEXT:    ; implicit-def: $vgpr22
+  ; GFX90A-NEXT:    ; implicit-def: $vgpr23
+  ; GFX90A-NEXT:    ; implicit-def: $vgpr24
+  ; GFX90A-NEXT:    ; implicit-def: $vgpr25
+  ; GFX90A-NEXT:    ; implicit-def: $vgpr26
+  ; GFX90A-NEXT:    ; implicit-def: $vgpr27
+  ; GFX90A-NEXT:    ; implicit-def: $vgpr28
+  ; GFX90A-NEXT:    ; implicit-def: $vgpr29
+  ; GFX90A-NEXT:    ; implicit-def: $vgpr30
+  ; GFX90A-NEXT:    ; implicit-def: $vgpr31
+  ; GFX90A-NEXT:    ; implicit-def: $vgpr32
+  ; GFX90A-NEXT:    ; implicit-def: $vgpr33
+  ; GFX90A-NEXT:    ; implicit-def: $vgpr34
+  ; GFX90A-NEXT:    ; implicit-def: $vgpr35
+  ; GFX90A-NEXT:    ; implicit-def: $vgpr36
+  ; GFX90A-NEXT:    ; implicit-def: $vgpr37
+  ; GFX90A-NEXT:    ; implicit-def: $vgpr38
+  ; GFX90A-NEXT:    ; implicit-def: $vgpr39
+  ; GFX90A-NEXT:    ; implicit-def: $vgpr40
+  ; GFX90A-NEXT:    ; implicit-def: $vgpr41
+  ; GFX90A-NEXT:    ; implicit-def: $vgpr42
+  ; GFX90A-NEXT:    ; implicit-def: $vgpr43
+  ; GFX90A-NEXT:    ; implicit-def: $vgpr44
+  ; GFX90A-NEXT:    ; implicit-def: $vgpr45
+  ; GFX90A-NEXT:    ; implicit-def: $vgpr46
+  ; GFX90A-NEXT:    ; implicit-def: $vgpr47
+  ; GFX90A-NEXT:    ; implicit-def: $vgpr48
+  ; GFX90A-NEXT:    ; implicit-def: $vgpr49
+  ; GFX90A-NEXT:    ; implicit-def: $vgpr50
+  ; GFX90A-NEXT:    ; implicit-def: $vgpr51
+  ; GFX90A-NEXT:    ; implicit-def: $vgpr52
+  ; GFX90A-NEXT:    ; implicit-def: $vgpr53
+  ; GFX90A-NEXT:    ; implicit-def: $vgpr54
+  ; GFX90A-NEXT:    ; implicit-def: $vgpr55
+  ; GFX90A-NEXT:    ; implicit-def: $vgpr56
+  ; GFX90A-NEXT:    ; implicit-def: $vgpr57
+  ; GFX90A-NEXT:    ; implicit-def: $vgpr58
+  ; GFX90A-NEXT:    ; implicit-def: $vgpr59
+  ; GFX90A-NEXT:    ; implicit-def: $vgpr60
+  ; GFX90A-NEXT:    ; implicit-def: $vgpr61
+  ; GFX90A-NEXT:    v_accvgpr_read_b32 v76, a97 ; Reload Reuse
+  ; GFX90A-NEXT:    v_accvgpr_read_b32 v75, a98 ; Reload Reuse
+  ; GFX90A-NEXT:    v_accvgpr_read_b32 v74, a99 ; Reload Reuse
+  ; GFX90A-NEXT:    v_accvgpr_read_b32 v73, a100 ; Reload Reuse
+  ; GFX90A-NEXT:    v_accvgpr_read_b32 v72, a101 ; Reload Reuse
+  ; GFX90A-NEXT:    v_accvgpr_read_b32 v71, a102 ; Reload Reuse
+  ; GFX90A-NEXT:    v_accvgpr_read_b32 v70, a103 ; Reload Reuse
+  ; GFX90A-NEXT:    v_accvgpr_read_b32 v69, a104 ; Reload Reuse
+  ; GFX90A-NEXT:    v_accvgpr_read_b32 v68, a105 ; Reload Reuse
+  ; GFX90A-NEXT:    v_accvgpr_read_b32 v67, a106 ; Reload Reuse
+  ; GFX90A-NEXT:    v_accvgpr_read_b32 v66, a107 ; Reload Reuse
+  ; GFX90A-NEXT:    v_accvgpr_read_b32 v65, a108 ; Reload Reuse
+  ; GFX90A-NEXT:    v_accvgpr_read_b32 v64, a109 ; Reload Reuse
+  ; GFX90A-NEXT:    v_accvgpr_read_b32 v63, a110 ; Reload Reuse
+  ; GFX90A-NEXT:    v_accvgpr_read_b32 v62, a111 ; Reload Reuse
+  ; GFX90A-NEXT:    v_accvgpr_read_b32 v93, a96 ; Reload Reuse
+  ; GFX90A-NEXT:    v_accvgpr_read_b32 v92, a97 ; Reload Reuse
+  ; GFX90A-NEXT:    v_accvgpr_read_b32 v91, a98 ; Reload Reuse
+  ; GFX90A-NEXT:    v_accvgpr_read_b32 v90, a99 ; Reload Reuse
+  ; GFX90A-NEXT:    v_accvgpr_read_b32 v89, a100 ; Reload Reuse
+  ; GFX90A-NEXT:    v_accvgpr_read_b32 v88, a101 ; Reload Reuse
+  ; GFX90A-NEXT:    v_accvgpr_read_b32 v87, a102 ; Reload Reuse
+  ; GFX90A-NEXT:    v_accvgpr_read_b32 v86, a103 ; Reload Reuse
+  ; GFX90A-NEXT:    v_accvgpr_read_b32 v85, a104 ; Reload Reuse
+  ; GFX90A-NEXT:    v_accvgpr_read_b32 v84, a105 ; Reload Reuse
+  ; GFX90A-NEXT:    v_accvgpr_read_b32 v83, a106 ; Reload Reuse
+  ; GFX90A-NEXT:    v_accvgpr_read_b32 v82, a107 ; Reload Reuse
+  ; GFX90A-NEXT:    v_accvgpr_read_b32 v81, a108 ; Reload Reuse
+  ; GFX90A-NEXT:    v_accvgpr_read_b32 v80, a109 ; Reload Reuse
+  ; GFX90A-NEXT:    v_accvgpr_read_b32 v79, a110 ; Reload Reuse
+  ; GFX90A-NEXT:    v_accvgpr_read_b32 v78, a111 ; Reload Reuse
+    ret void
+  }
+
+  attributes #0 = { "amdgpu-waves-per-eu"="4,4" }
+
+...
+---
+name:            test_spill
+tracksRegLiveness: true
+stack:
+  - { id: 0, name: '', type: spill-slot, offset: 0, size: 64, alignment: 4 }
+  - { id: 1, name: '', type: spill-slot, offset: 0, size: 64, alignment: 4 }
+
+machineFunctionInfo:
+  scratchRSrcReg:  $sgpr0_sgpr1_sgpr2_sgpr3
+  stackPtrOffsetReg: '$sgpr32'
+  hasSpilledVGPRs: true
+body:             |
+  bb.0:
+    $agpr96_agpr97_agpr98_agpr99_agpr100_agpr101_agpr102_agpr103_agpr104_agpr105_agpr106_agpr107_agpr108_agpr109_agpr110_agpr111 = IMPLICIT_DEF
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr2 = IMPLICIT_DEF
+    $vgpr3 = IMPLICIT_DEF
+    $vgpr4 = IMPLICIT_DEF
+    $vgpr5 = IMPLICIT_DEF
+    $vgpr6 = IMPLICIT_DEF
+    $vgpr7 = IMPLICIT_DEF
+    $vgpr8 = IMPLICIT_DEF
+    $vgpr9 = IMPLICIT_DEF
+    $vgpr10 = IMPLICIT_DEF
+    $vgpr11 = IMPLICIT_DEF
+    $vgpr12 = IMPLICIT_DEF
+    $vgpr13 = IMPLICIT_DEF
+    $vgpr14 = IMPLICIT_DEF
+    $vgpr15 = IMPLICIT_DEF
+    $vgpr16 = IMPLICIT_DEF
+    $vgpr17 = IMPLICIT_DEF
+    $vgpr18 = IMPLICIT_DEF
+    $vgpr19 = IMPLICIT_DEF
+    $vgpr20 = IMPLICIT_DEF
+    $vgpr21 = IMPLICIT_DEF
+    $vgpr22 = IMPLICIT_DEF
+    $vgpr23 = IMPLICIT_DEF
+    $vgpr24 = IMPLICIT_DEF
+    $vgpr25 = IMPLICIT_DEF
+    $vgpr26 = IMPLICIT_DEF
+    $vgpr27 = IMPLICIT_DEF
+    $vgpr28 = IMPLICIT_DEF
+    $vgpr29 = IMPLICIT_DEF
+    $vgpr30 = IMPLICIT_DEF
+    $vgpr31 = IMPLICIT_DEF
+    $vgpr32 = IMPLICIT_DEF
+    $vgpr33 = IMPLICIT_DEF
+    $vgpr34 = IMPLICIT_DEF
+    $vgpr35 = IMPLICIT_DEF
+    $vgpr36 = IMPLICIT_DEF
+    $vgpr37 = IMPLICIT_DEF
+    $vgpr38 = IMPLICIT_DEF
+    $vgpr39 = IMPLICIT_DEF
+    $vgpr40 = IMPLICIT_DEF
+    $vgpr41 = IMPLICIT_DEF
+    $vgpr42 = IMPLICIT_DEF
+    $vgpr43 = IMPLICIT_DEF
+    $vgpr44 = IMPLICIT_DEF
+    $vgpr45 = IMPLICIT_DEF
+    $vgpr46 = IMPLICIT_DEF
+    $vgpr47 = IMPLICIT_DEF
+    $vgpr48 = IMPLICIT_DEF
+    $vgpr49 = IMPLICIT_DEF
+    $vgpr50 = IMPLICIT_DEF
+    $vgpr51 = IMPLICIT_DEF
+    $vgpr52 = IMPLICIT_DEF
+    $vgpr53 = IMPLICIT_DEF
+    $vgpr54 = IMPLICIT_DEF
+    $vgpr55 = IMPLICIT_DEF
+    $vgpr56 = IMPLICIT_DEF
+    $vgpr57 = IMPLICIT_DEF
+    $vgpr58 = IMPLICIT_DEF
+    $vgpr59 = IMPLICIT_DEF
+    $vgpr60 = IMPLICIT_DEF
+    $vgpr61 = IMPLICIT_DEF
+
+    $agpr64_agpr65_agpr66_agpr67_agpr68_agpr69_agpr70_agpr71_agpr72_agpr73_agpr74_agpr75_agpr76_agpr77_agpr78_agpr79 = COPY $agpr96_agpr97_agpr98_agpr99_agpr100_agpr101_agpr102_agpr103_agpr104_agpr105_agpr106_agpr107_agpr108_agpr109_agpr110_agpr111, implicit $exec
+    SI_SPILL_AV512_SAVE killed $agpr96_agpr97_agpr98_agpr99_agpr100_agpr101_agpr102_agpr103_agpr104_agpr105_agpr106_agpr107_agpr108_agpr109_agpr110_agpr111, %stack.0, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.0, align 4, addrspace 5)
+    SI_SPILL_AV512_SAVE $agpr96_agpr97_agpr98_agpr99_agpr100_agpr101_agpr102_agpr103_agpr104_agpr105_agpr106_agpr107_agpr108_agpr109_agpr110_agpr111, %stack.1, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.0, align 4, addrspace 5)
+...


        


More information about the llvm-commits mailing list