[llvm] 8fcb4fa - [RegScavenger] Change scavengeRegister to pick registers in allocation order

Jay Foad via llvm-commits llvm-commits at lists.llvm.org
Fri May 19 13:42:07 PDT 2023


Author: Jay Foad
Date: 2023-05-19T21:39:19+01:00
New Revision: 8fcb4fa847f9b5c63520804f8035292fbe24fd75

URL: https://github.com/llvm/llvm-project/commit/8fcb4fa847f9b5c63520804f8035292fbe24fd75
DIFF: https://github.com/llvm/llvm-project/commit/8fcb4fa847f9b5c63520804f8035292fbe24fd75.diff

LOG: [RegScavenger] Change scavengeRegister to pick registers in allocation order

This matches what scavengeRegisterBackwards does.

This is in preparation for converting most uses of scavengeRegister to
scavengeRegisterBackwards, to reduce test case churn when that lands and
to help with bisection if anything goes wrong.

Differential Revision: https://reviews.llvm.org/D150792

Added: 
    

Modified: 
    llvm/include/llvm/CodeGen/RegisterScavenging.h
    llvm/lib/CodeGen/RegisterScavenging.cpp
    llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
    llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll
    llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll
    llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll
    llvm/test/CodeGen/AMDGPU/flat-scratch.ll
    llvm/test/CodeGen/AMDGPU/frame-index.mir
    llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll
    llvm/test/CodeGen/AMDGPU/load-lo16.ll
    llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
    llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir
    llvm/test/CodeGen/AMDGPU/scratch-simple.ll
    llvm/test/CodeGen/AMDGPU/spill-offset-calculation.ll
    llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll
    llvm/test/CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/RegisterScavenging.h b/llvm/include/llvm/CodeGen/RegisterScavenging.h
index 94a127ebee184..1f61baaacbf31 100644
--- a/llvm/include/llvm/CodeGen/RegisterScavenging.h
+++ b/llvm/include/llvm/CodeGen/RegisterScavenging.h
@@ -223,6 +223,7 @@ class RegScavenger {
   /// No more than InstrLimit instructions are inspected.
   Register findSurvivorReg(MachineBasicBlock::iterator StartMI,
                            BitVector &Candidates,
+                           ArrayRef<MCPhysReg> AllocationOrder,
                            unsigned InstrLimit,
                            MachineBasicBlock::iterator &UseMI);
 

diff  --git a/llvm/lib/CodeGen/RegisterScavenging.cpp b/llvm/lib/CodeGen/RegisterScavenging.cpp
index a636da7325739..aa28198d94702 100644
--- a/llvm/lib/CodeGen/RegisterScavenging.cpp
+++ b/llvm/lib/CodeGen/RegisterScavenging.cpp
@@ -272,9 +272,18 @@ BitVector RegScavenger::getRegsAvailable(const TargetRegisterClass *RC) {
 
 Register RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI,
                                        BitVector &Candidates,
+                                       ArrayRef<MCPhysReg> AllocationOrder,
                                        unsigned InstrLimit,
                                        MachineBasicBlock::iterator &UseMI) {
-  int Survivor = Candidates.find_first();
+  auto FindFirstCandidate = [&]() -> int {
+    for (MCPhysReg Reg : AllocationOrder) {
+      if (Candidates.test(Reg))
+        return Reg;
+    }
+    return -1;
+  };
+
+  int Survivor = FindFirstCandidate();
   assert(Survivor > 0 && "No candidates for scavenging");
 
   MachineBasicBlock::iterator ME = MBB->getFirstTerminator();
@@ -322,7 +331,7 @@ Register RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI,
     if (Candidates.none())
       break;
 
-    Survivor = Candidates.find_first();
+    Survivor = FindFirstCandidate();
   }
   // If we ran off the end, that's where we want to restore.
   if (MI == ME) RestorePointMI = ME;
@@ -551,7 +560,8 @@ Register RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
 
   // Find the register whose use is furthest away.
   MachineBasicBlock::iterator UseMI;
-  Register SReg = findSurvivorReg(I, Candidates, 25, UseMI);
+  Register SReg =
+      findSurvivorReg(I, Candidates, RC->getRawAllocationOrder(MF), 25, UseMI);
 
   // If we found an unused register there is no reason to spill it.
   if (!isRegUsed(SReg)) {

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll
index ff4515e371434..61645d7738fec 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll
@@ -160,11 +160,11 @@ define amdgpu_kernel void @kernel_caller_byval() {
 ; FLATSCR-NEXT:    s_addc_u32 flat_scratch_hi, s1, 0
 ; FLATSCR-NEXT:    v_mov_b32_e32 v1, 0
 ; FLATSCR-NEXT:    s_mov_b32 s8, 0
+; FLATSCR-NEXT:    s_mov_b32 s12, 0
+; FLATSCR-NEXT:    s_mov_b32 s11, 0
 ; FLATSCR-NEXT:    s_mov_b32 s10, 0
 ; FLATSCR-NEXT:    s_mov_b32 s9, 0
-; FLATSCR-NEXT:    s_mov_b32 vcc_lo, 0
-; FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
-; FLATSCR-NEXT:    s_mov_b32 s11, 0
+; FLATSCR-NEXT:    s_mov_b32 s13, 0
 ; FLATSCR-NEXT:    s_mov_b32 s7, 0
 ; FLATSCR-NEXT:    s_mov_b32 s5, 0
 ; FLATSCR-NEXT:    s_mov_b32 s3, 0
@@ -174,11 +174,11 @@ define amdgpu_kernel void @kernel_caller_byval() {
 ; FLATSCR-NEXT:    s_mov_b32 s4, 0
 ; FLATSCR-NEXT:    s_mov_b32 s6, 0
 ; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s8 offset:8
-; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s10 offset:16
-; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s9 offset:24
-; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], vcc_lo offset:32
-; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], vcc_hi offset:40
-; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s11 offset:48
+; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s12 offset:16
+; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s11 offset:24
+; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s10 offset:32
+; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s9 offset:40
+; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s13 offset:48
 ; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s7 offset:56
 ; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s5 offset:64
 ; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s3 offset:72
@@ -188,11 +188,11 @@ define amdgpu_kernel void @kernel_caller_byval() {
 ; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s4 offset:104
 ; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s6 offset:112
 ; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s8 offset:120
-; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s10 offset:128
-; FLATSCR-NEXT:    scratch_load_dwordx2 v[0:1], off, s9 offset:8
+; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s12 offset:128
+; FLATSCR-NEXT:    scratch_load_dwordx2 v[0:1], off, s11 offset:8
 ; FLATSCR-NEXT:    s_nop 0
-; FLATSCR-NEXT:    scratch_load_dwordx2 v[2:3], off, vcc_lo offset:16
-; FLATSCR-NEXT:    scratch_load_dwordx2 v[4:5], off, vcc_hi offset:24
+; FLATSCR-NEXT:    scratch_load_dwordx2 v[2:3], off, s10 offset:16
+; FLATSCR-NEXT:    scratch_load_dwordx2 v[4:5], off, s9 offset:24
 ; FLATSCR-NEXT:    s_mov_b32 s37, 0
 ; FLATSCR-NEXT:    scratch_load_dwordx2 v[6:7], off, s37 offset:32
 ; FLATSCR-NEXT:    s_mov_b32 s36, 0

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
index e02eb3aee9d7b..411b599519def 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
@@ -279,8 +279,8 @@ define amdgpu_kernel void @store_load_sindex_small_offset_kernel(i32 %idx) {
 ; GFX9-NEXT:    s_load_dword s0, s[0:1], 0x24
 ; GFX9-NEXT:    s_add_u32 flat_scratch_lo, s2, s5
 ; GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s3, 0
-; GFX9-NEXT:    s_mov_b32 vcc_hi, 0
-; GFX9-NEXT:    scratch_load_dword v0, off, vcc_hi offset:4 glc
+; GFX9-NEXT:    s_mov_b32 s2, 0
+; GFX9-NEXT:    scratch_load_dword v0, off, s2 offset:4 glc
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    s_lshl_b32 s1, s0, 2
 ; GFX9-NEXT:    s_and_b32 s0, s0, 15
@@ -368,8 +368,8 @@ define amdgpu_kernel void @store_load_vindex_small_offset_kernel() {
 ; GFX9:       ; %bb.0: ; %bb
 ; GFX9-NEXT:    s_add_u32 flat_scratch_lo, s0, s3
 ; GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s1, 0
-; GFX9-NEXT:    s_mov_b32 vcc_hi, 0
-; GFX9-NEXT:    scratch_load_dword v1, off, vcc_hi offset:4 glc
+; GFX9-NEXT:    s_mov_b32 s0, 0
+; GFX9-NEXT:    scratch_load_dword v1, off, s0 offset:4 glc
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
 ; GFX9-NEXT:    v_add_u32_e32 v1, 0x104, v1
@@ -459,15 +459,15 @@ define void @store_load_vindex_small_offset_foo(i32 %idx) {
 ; GFX9-NEXT:    scratch_load_dword v1, off, s32 glc
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
-; GFX9-NEXT:    s_add_i32 vcc_lo, s32, 0x100
+; GFX9-NEXT:    s_add_i32 s1, s32, 0x100
 ; GFX9-NEXT:    v_and_b32_e32 v0, 15, v0
-; GFX9-NEXT:    v_add_u32_e32 v1, vcc_lo, v1
+; GFX9-NEXT:    v_add_u32_e32 v1, s1, v1
 ; GFX9-NEXT:    v_mov_b32_e32 v2, 15
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    s_add_i32 vcc_hi, s32, 0x100
+; GFX9-NEXT:    s_add_i32 s0, s32, 0x100
 ; GFX9-NEXT:    scratch_store_dword v1, v2, off
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_add_u32_e32 v0, vcc_hi, v0
+; GFX9-NEXT:    v_add_u32_e32 v0, s0, v0
 ; GFX9-NEXT:    scratch_load_dword v0, v0, off glc
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
@@ -478,14 +478,14 @@ define void @store_load_vindex_small_offset_foo(i32 %idx) {
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-NEXT:    v_and_b32_e32 v1, 15, v0
 ; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-NEXT:    s_add_i32 s1, s32, 0x100
 ; GFX10-NEXT:    s_add_i32 s0, s32, 0x100
-; GFX10-NEXT:    s_add_i32 vcc_lo, s32, 0x100
 ; GFX10-NEXT:    v_mov_b32_e32 v2, 15
 ; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 2, v1
-; GFX10-NEXT:    v_add_nc_u32_e32 v0, s0, v0
+; GFX10-NEXT:    v_add_nc_u32_e32 v0, s1, v0
 ; GFX10-NEXT:    scratch_load_dword v3, off, s32 glc dlc
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    v_add_nc_u32_e32 v1, vcc_lo, v1
+; GFX10-NEXT:    v_add_nc_u32_e32 v1, s0, v1
 ; GFX10-NEXT:    scratch_store_dword v0, v2, off
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-NEXT:    scratch_load_dword v0, v1, off glc dlc
@@ -498,8 +498,8 @@ define void @store_load_vindex_small_offset_foo(i32 %idx) {
 ; GFX940-NEXT:    scratch_load_dword v1, off, s32 sc0 sc1
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
-; GFX940-NEXT:    s_add_i32 vcc_hi, s32, 0x100
-; GFX940-NEXT:    v_add_u32_e32 v1, vcc_hi, v1
+; GFX940-NEXT:    s_add_i32 s0, s32, 0x100
+; GFX940-NEXT:    v_add_u32_e32 v1, s0, v1
 ; GFX940-NEXT:    v_mov_b32_e32 v2, 15
 ; GFX940-NEXT:    v_and_b32_e32 v0, 15, v0
 ; GFX940-NEXT:    scratch_store_dword v1, v2, off sc0 sc1
@@ -515,10 +515,10 @@ define void @store_load_vindex_small_offset_foo(i32 %idx) {
 ; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX11-NEXT:    v_dual_mov_b32 v2, 15 :: v_dual_lshlrev_b32 v1, 2, v0
 ; GFX11-NEXT:    v_and_b32_e32 v0, 15, v0
-; GFX11-NEXT:    s_add_i32 vcc_lo, s32, 0x100
+; GFX11-NEXT:    s_add_i32 s0, s32, 0x100
 ; GFX11-NEXT:    scratch_load_b32 v3, off, s32 glc dlc
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_add_nc_u32_e32 v1, vcc_lo, v1
+; GFX11-NEXT:    v_add_nc_u32_e32 v1, s0, v1
 ; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX11-NEXT:    scratch_store_b32 v1, v2, off dlc
 ; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
@@ -544,8 +544,8 @@ define amdgpu_kernel void @store_load_sindex_large_offset_kernel(i32 %idx) {
 ; GFX9-NEXT:    s_load_dword s0, s[0:1], 0x24
 ; GFX9-NEXT:    s_add_u32 flat_scratch_lo, s2, s5
 ; GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s3, 0
-; GFX9-NEXT:    s_mov_b32 vcc_hi, 0
-; GFX9-NEXT:    scratch_load_dword v0, off, vcc_hi offset:4 glc
+; GFX9-NEXT:    s_mov_b32 s2, 0
+; GFX9-NEXT:    scratch_load_dword v0, off, s2 offset:4 glc
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    s_lshl_b32 s1, s0, 2
 ; GFX9-NEXT:    s_and_b32 s0, s0, 15
@@ -587,7 +587,6 @@ define amdgpu_kernel void @store_load_sindex_large_offset_kernel(i32 %idx) {
 ; GFX940-NEXT:    scratch_load_dword v0, off, off offset:4 sc0 sc1
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-NEXT:    v_mov_b32_e32 v0, 15
-; GFX940-NEXT:    s_movk_i32 vcc_hi, 0x4004
 ; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX940-NEXT:    s_lshl_b32 s1, s0, 2
 ; GFX940-NEXT:    s_and_b32 s0, s0, 15
@@ -596,14 +595,14 @@ define amdgpu_kernel void @store_load_sindex_large_offset_kernel(i32 %idx) {
 ; GFX940-NEXT:    scratch_store_dword off, v0, s1 sc0 sc1
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-NEXT:    v_mov_b32_e32 v0, s0
-; GFX940-NEXT:    scratch_load_dword v0, v0, vcc_hi sc0 sc1
+; GFX940-NEXT:    s_movk_i32 s0, 0x4004
+; GFX940-NEXT:    scratch_load_dword v0, v0, s0 sc0 sc1
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: store_load_sindex_large_offset_kernel:
 ; GFX11:       ; %bb.0: ; %bb
 ; GFX11-NEXT:    s_load_b32 s0, s[0:1], 0x24
-; GFX11-NEXT:    s_movk_i32 vcc_lo, 0x4004
 ; GFX11-NEXT:    scratch_load_b32 v2, off, off offset:4 glc dlc
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    s_and_b32 s1, s0, 15
@@ -614,7 +613,8 @@ define amdgpu_kernel void @store_load_sindex_large_offset_kernel(i32 %idx) {
 ; GFX11-NEXT:    s_addk_i32 s0, 0x4004
 ; GFX11-NEXT:    scratch_store_b32 off, v0, s0 dlc
 ; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX11-NEXT:    scratch_load_b32 v0, v1, vcc_lo glc dlc
+; GFX11-NEXT:    s_movk_i32 s0, 0x4004
+; GFX11-NEXT:    scratch_load_b32 v0, v1, s0 glc dlc
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    s_endpgm
 bb:
@@ -635,8 +635,8 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel() {
 ; GFX9:       ; %bb.0: ; %bb
 ; GFX9-NEXT:    s_add_u32 flat_scratch_lo, s0, s3
 ; GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s1, 0
-; GFX9-NEXT:    s_mov_b32 vcc_hi, 0
-; GFX9-NEXT:    scratch_load_dword v1, off, vcc_hi offset:4 glc
+; GFX9-NEXT:    s_mov_b32 s0, 0
+; GFX9-NEXT:    scratch_load_dword v1, off, s0 offset:4 glc
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
 ; GFX9-NEXT:    v_add_u32_e32 v1, 0x4004, v1
@@ -679,10 +679,10 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel() {
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
 ; GFX940-NEXT:    v_mov_b32_e32 v3, 15
-; GFX940-NEXT:    s_movk_i32 vcc_hi, 0x4004
+; GFX940-NEXT:    s_movk_i32 s0, 0x4004
 ; GFX940-NEXT:    v_sub_u32_e32 v0, 0, v0
 ; GFX940-NEXT:    v_mov_b32_e32 v2, 0x4004
-; GFX940-NEXT:    scratch_store_dword v1, v3, vcc_hi sc0 sc1
+; GFX940-NEXT:    scratch_store_dword v1, v3, s0 sc0 sc1
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX940-NEXT:    v_mov_b32_e32 v1, 0x7c
@@ -695,13 +695,13 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel() {
 ; GFX11:       ; %bb.0: ; %bb
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v1, 0, v0
 ; GFX11-NEXT:    v_dual_mov_b32 v3, 15 :: v_dual_lshlrev_b32 v0, 2, v0
-; GFX11-NEXT:    s_movk_i32 vcc_lo, 0x4004
+; GFX11-NEXT:    s_movk_i32 s0, 0x4004
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_dual_mov_b32 v2, 0x7c :: v_dual_lshlrev_b32 v1, 2, v1
 ; GFX11-NEXT:    v_add3_u32 v1, 0x4004, v1, v2
 ; GFX11-NEXT:    scratch_load_b32 v2, off, off offset:4 glc dlc
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    scratch_store_b32 v0, v3, vcc_lo dlc
+; GFX11-NEXT:    scratch_store_b32 v0, v3, s0 dlc
 ; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX11-NEXT:    scratch_load_b32 v0, v1, off glc dlc
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
@@ -728,15 +728,15 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) {
 ; GFX9-NEXT:    scratch_load_dword v1, off, s32 offset:4 glc
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
-; GFX9-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
+; GFX9-NEXT:    s_add_i32 s1, s32, 0x4004
 ; GFX9-NEXT:    v_and_b32_e32 v0, 15, v0
-; GFX9-NEXT:    v_add_u32_e32 v1, vcc_lo, v1
+; GFX9-NEXT:    v_add_u32_e32 v1, s1, v1
 ; GFX9-NEXT:    v_mov_b32_e32 v2, 15
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    s_add_i32 vcc_hi, s32, 0x4004
+; GFX9-NEXT:    s_add_i32 s0, s32, 0x4004
 ; GFX9-NEXT:    scratch_store_dword v1, v2, off
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_add_u32_e32 v0, vcc_hi, v0
+; GFX9-NEXT:    v_add_u32_e32 v0, s0, v0
 ; GFX9-NEXT:    scratch_load_dword v0, v0, off glc
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
@@ -747,14 +747,14 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) {
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-NEXT:    v_and_b32_e32 v1, 15, v0
 ; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-NEXT:    s_add_i32 s1, s32, 0x4004
 ; GFX10-NEXT:    s_add_i32 s0, s32, 0x4004
-; GFX10-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
 ; GFX10-NEXT:    v_mov_b32_e32 v2, 15
 ; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 2, v1
-; GFX10-NEXT:    v_add_nc_u32_e32 v0, s0, v0
+; GFX10-NEXT:    v_add_nc_u32_e32 v0, s1, v0
 ; GFX10-NEXT:    scratch_load_dword v3, off, s32 offset:4 glc dlc
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    v_add_nc_u32_e32 v1, vcc_lo, v1
+; GFX10-NEXT:    v_add_nc_u32_e32 v1, s0, v1
 ; GFX10-NEXT:    scratch_store_dword v0, v2, off
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-NEXT:    scratch_load_dword v0, v1, off glc dlc
@@ -767,15 +767,15 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) {
 ; GFX940-NEXT:    scratch_load_dword v1, off, s32 offset:4 sc0 sc1
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
-; GFX940-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
-; GFX940-NEXT:    v_add_u32_e32 v1, vcc_lo, v1
+; GFX940-NEXT:    s_add_i32 s1, s32, 0x4004
+; GFX940-NEXT:    v_add_u32_e32 v1, s1, v1
 ; GFX940-NEXT:    v_mov_b32_e32 v2, 15
 ; GFX940-NEXT:    v_and_b32_e32 v0, 15, v0
 ; GFX940-NEXT:    scratch_store_dword v1, v2, off sc0 sc1
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX940-NEXT:    s_add_i32 vcc_hi, s32, 0x4004
-; GFX940-NEXT:    scratch_load_dword v0, v0, vcc_hi sc0 sc1
+; GFX940-NEXT:    s_add_i32 s0, s32, 0x4004
+; GFX940-NEXT:    scratch_load_dword v0, v0, s0 sc0 sc1
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -785,16 +785,16 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) {
 ; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX11-NEXT:    v_dual_mov_b32 v2, 15 :: v_dual_lshlrev_b32 v1, 2, v0
 ; GFX11-NEXT:    v_and_b32_e32 v0, 15, v0
+; GFX11-NEXT:    s_add_i32 s1, s32, 0x4004
 ; GFX11-NEXT:    s_add_i32 s0, s32, 0x4004
-; GFX11-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT:    v_add_nc_u32_e32 v1, s0, v1
+; GFX11-NEXT:    v_add_nc_u32_e32 v1, s1, v1
 ; GFX11-NEXT:    scratch_load_b32 v3, off, s32 offset:4 glc dlc
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX11-NEXT:    scratch_store_b32 v1, v2, off dlc
 ; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX11-NEXT:    scratch_load_b32 v0, v0, vcc_lo glc dlc
+; GFX11-NEXT:    scratch_load_b32 v0, v0, s0 glc dlc
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 bb:
@@ -816,9 +816,9 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
 ; GFX9-NEXT:    s_add_u32 flat_scratch_lo, s0, s3
 ; GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s1, 0
 ; GFX9-NEXT:    v_mov_b32_e32 v0, 13
-; GFX9-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-NEXT:    s_mov_b32 s1, 0
 ; GFX9-NEXT:    s_movk_i32 s0, 0x3e80
-; GFX9-NEXT:    scratch_store_dword off, v0, vcc_hi offset:4
+; GFX9-NEXT:    scratch_store_dword off, v0, s1 offset:4
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v0, 15
 ; GFX9-NEXT:    s_add_i32 s0, s0, 4
@@ -887,11 +887,11 @@ define void @store_load_large_imm_offset_foo() {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v0, 13
 ; GFX9-NEXT:    s_movk_i32 s0, 0x3e80
-; GFX9-NEXT:    s_add_i32 vcc_hi, s32, 4
+; GFX9-NEXT:    s_add_i32 s1, s32, 4
 ; GFX9-NEXT:    scratch_store_dword off, v0, s32 offset:4
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v0, 15
-; GFX9-NEXT:    s_add_i32 s0, s0, vcc_hi
+; GFX9-NEXT:    s_add_i32 s0, s0, s1
 ; GFX9-NEXT:    scratch_store_dword off, v0, s0
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    scratch_load_dword v0, off, s0 glc
@@ -905,8 +905,8 @@ define void @store_load_large_imm_offset_foo() {
 ; GFX10-NEXT:    v_mov_b32_e32 v0, 13
 ; GFX10-NEXT:    v_mov_b32_e32 v1, 15
 ; GFX10-NEXT:    s_movk_i32 s0, 0x3e80
-; GFX10-NEXT:    s_add_i32 vcc_lo, s32, 4
-; GFX10-NEXT:    s_add_i32 s0, s0, vcc_lo
+; GFX10-NEXT:    s_add_i32 s1, s32, 4
+; GFX10-NEXT:    s_add_i32 s0, s0, s1
 ; GFX10-NEXT:    scratch_store_dword off, v0, s32 offset:4
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-NEXT:    scratch_store_dword off, v1, s0

diff  --git a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll
index cde7716ab7c18..fc6b4b584e098 100644
--- a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll
+++ b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll
@@ -379,8 +379,8 @@ define void @no_new_vgpr_for_fp_csr() #1 {
 ; GCN-NEXT:     v_mov_b32_e32 [[ZERO:v[0-9]+]], 0
 ; MUBUF-NEXT:   v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0x2000{{$}}
 ; MUBUF-NEXT:   buffer_store_dword [[ZERO]], [[OFFSET]], s[0:3], s33 offen{{$}}
-; FLATSCR-NEXT: s_add_i32 vcc_hi, s33, 0x2000
-; FLATSCR-NEXT: scratch_store_dword off, [[ZERO]], vcc_hi
+; FLATSCR-NEXT: s_add_i32 s1, s33, 0x2000
+; FLATSCR-NEXT: scratch_store_dword off, [[ZERO]], s1
 ; GCN-NEXT: s_waitcnt vmcnt(0)
 ; MUBUF-NEXT:   s_add_i32 s32, s32, 0xffe80000
 ; FLATSCR-NEXT: s_addk_i32 s32, 0xa000

diff  --git a/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll b/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll
index 35794c9737860..37a3fb547c63f 100644
--- a/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll
+++ b/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll
@@ -481,26 +481,26 @@ define amdgpu_kernel void @vload2_private(ptr addrspace(1) nocapture readonly %i
 ; FLATSCR-NEXT:    s_addc_u32 flat_scratch_hi, s3, 0
 ; FLATSCR-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; FLATSCR-NEXT:    v_mov_b32_e32 v2, 0
+; FLATSCR-NEXT:    s_mov_b32 s6, 0
 ; FLATSCR-NEXT:    s_mov_b32 s5, 0
 ; FLATSCR-NEXT:    s_mov_b32 s4, 0
-; FLATSCR-NEXT:    s_mov_b32 vcc_lo, 0
 ; FLATSCR-NEXT:    s_waitcnt lgkmcnt(0)
 ; FLATSCR-NEXT:    global_load_ushort v0, v2, s[0:1]
-; FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; FLATSCR-NEXT:    scratch_store_short off, v0, s5 offset:4
+; FLATSCR-NEXT:    scratch_store_short off, v0, s6 offset:4
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
 ; FLATSCR-NEXT:    global_load_ushort v0, v2, s[0:1] offset:2
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; FLATSCR-NEXT:    scratch_store_short off, v0, s4 offset:6
+; FLATSCR-NEXT:    scratch_store_short off, v0, s5 offset:6
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
 ; FLATSCR-NEXT:    global_load_ushort v0, v2, s[0:1] offset:4
+; FLATSCR-NEXT:    s_mov_b32 s1, 0
 ; FLATSCR-NEXT:    s_mov_b32 s0, 0
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; FLATSCR-NEXT:    scratch_store_short off, v0, s0 offset:8
+; FLATSCR-NEXT:    scratch_store_short off, v0, s4 offset:8
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; FLATSCR-NEXT:    scratch_load_dword v0, off, vcc_lo offset:4
-; FLATSCR-NEXT:    scratch_load_dword v1, off, vcc_hi offset:6
+; FLATSCR-NEXT:    scratch_load_dword v0, off, s1 offset:4
+; FLATSCR-NEXT:    scratch_load_dword v1, off, s0 offset:6
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
 ; FLATSCR-NEXT:    global_store_dwordx2 v2, v[0:1], s[2:3]
 ; FLATSCR-NEXT:    s_endpgm
@@ -544,28 +544,28 @@ define amdgpu_kernel void @vload2_private(ptr addrspace(1) nocapture readonly %i
 ; FLATSCR_GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3
 ; FLATSCR_GFX10-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; FLATSCR_GFX10-NEXT:    v_mov_b32_e32 v2, 0
+; FLATSCR_GFX10-NEXT:    s_mov_b32 s6, 0
 ; FLATSCR_GFX10-NEXT:    s_mov_b32 s5, 0
 ; FLATSCR_GFX10-NEXT:    s_mov_b32 s4, 0
-; FLATSCR_GFX10-NEXT:    s_mov_b32 vcc_lo, 0
 ; FLATSCR_GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; FLATSCR_GFX10-NEXT:    global_load_ushort v0, v2, s[0:1]
 ; FLATSCR_GFX10-NEXT:    s_waitcnt vmcnt(0)
-; FLATSCR_GFX10-NEXT:    scratch_store_short off, v0, s5 offset:4
+; FLATSCR_GFX10-NEXT:    scratch_store_short off, v0, s6 offset:4
 ; FLATSCR_GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; FLATSCR_GFX10-NEXT:    global_load_ushort v0, v2, s[0:1] offset:2
 ; FLATSCR_GFX10-NEXT:    s_waitcnt vmcnt(0)
-; FLATSCR_GFX10-NEXT:    scratch_store_short off, v0, s4 offset:6
+; FLATSCR_GFX10-NEXT:    scratch_store_short off, v0, s5 offset:6
 ; FLATSCR_GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; FLATSCR_GFX10-NEXT:    global_load_ushort v0, v2, s[0:1] offset:4
 ; FLATSCR_GFX10-NEXT:    s_waitcnt_depctr 0xffe3
 ; FLATSCR_GFX10-NEXT:    s_mov_b32 s1, 0
 ; FLATSCR_GFX10-NEXT:    s_mov_b32 s0, 0
 ; FLATSCR_GFX10-NEXT:    s_waitcnt vmcnt(0)
-; FLATSCR_GFX10-NEXT:    scratch_store_short off, v0, s1 offset:8
+; FLATSCR_GFX10-NEXT:    scratch_store_short off, v0, s4 offset:8
 ; FLATSCR_GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; FLATSCR_GFX10-NEXT:    s_clause 0x1
-; FLATSCR_GFX10-NEXT:    scratch_load_dword v0, off, s0 offset:4
-; FLATSCR_GFX10-NEXT:    scratch_load_dword v1, off, vcc_lo offset:6
+; FLATSCR_GFX10-NEXT:    scratch_load_dword v0, off, s1 offset:4
+; FLATSCR_GFX10-NEXT:    scratch_load_dword v1, off, s0 offset:6
 ; FLATSCR_GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; FLATSCR_GFX10-NEXT:    global_store_dwordx2 v2, v[0:1], s[2:3]
 ; FLATSCR_GFX10-NEXT:    s_endpgm

diff  --git a/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll
index f69602bdc39ba..5fea62ca5b375 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll
@@ -42,16 +42,16 @@ define amdgpu_kernel void @stack_object_in_kernel_no_calls() {
 ; FLAT_SCR_OPT-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
 ; FLAT_SCR_OPT-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
 ; FLAT_SCR_OPT-NEXT:    v_mov_b32_e32 v0, 0
-; FLAT_SCR_OPT-NEXT:    s_mov_b32 vcc_lo, 0
-; FLAT_SCR_OPT-NEXT:    scratch_store_dword off, v0, vcc_lo offset:4
+; FLAT_SCR_OPT-NEXT:    s_mov_b32 s0, 0
+; FLAT_SCR_OPT-NEXT:    scratch_store_dword off, v0, s0 offset:4
 ; FLAT_SCR_OPT-NEXT:    s_waitcnt_vscnt null, 0x0
 ; FLAT_SCR_OPT-NEXT:    s_endpgm
 ;
 ; FLAT_SCR_ARCH-LABEL: stack_object_in_kernel_no_calls:
 ; FLAT_SCR_ARCH:       ; %bb.0:
 ; FLAT_SCR_ARCH-NEXT:    v_mov_b32_e32 v0, 0
-; FLAT_SCR_ARCH-NEXT:    s_mov_b32 vcc_lo, 0
-; FLAT_SCR_ARCH-NEXT:    scratch_store_dword off, v0, vcc_lo offset:4
+; FLAT_SCR_ARCH-NEXT:    s_mov_b32 s0, 0
+; FLAT_SCR_ARCH-NEXT:    scratch_store_dword off, v0, s0 offset:4
 ; FLAT_SCR_ARCH-NEXT:    s_waitcnt_vscnt null, 0x0
 ; FLAT_SCR_ARCH-NEXT:    s_endpgm
   %alloca = alloca i32, addrspace(5)

diff  --git a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll
index 223cd4a691568..ce24e68f60b65 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll
@@ -21,13 +21,13 @@ define amdgpu_kernel void @zero_init_kernel() {
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX9-NEXT:    v_mov_b32_e32 v2, s2
 ; GFX9-NEXT:    v_mov_b32_e32 v3, s3
+; GFX9-NEXT:    s_mov_b32 s3, 0
+; GFX9-NEXT:    s_mov_b32 s2, 0
 ; GFX9-NEXT:    s_mov_b32 s1, 0
-; GFX9-NEXT:    s_mov_b32 vcc_lo, 0
-; GFX9-NEXT:    s_mov_b32 vcc_hi, 0
-; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], s1 offset:52
-; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:36
-; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:20
-; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:4
+; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], s3 offset:52
+; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], s2 offset:36
+; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], s1 offset:20
+; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:4
 ; GFX9-NEXT:    s_endpgm
 ;
 ; GFX10-LABEL: zero_init_kernel:
@@ -73,8 +73,6 @@ define amdgpu_kernel void @zero_init_kernel() {
 ; GFX9-PAL-NEXT:    s_mov_b32 s2, s0
 ; GFX9-PAL-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
 ; GFX9-PAL-NEXT:    s_mov_b32 s0, 0
-; GFX9-PAL-NEXT:    s_mov_b32 vcc_lo, 0
-; GFX9-PAL-NEXT:    s_mov_b32 vcc_hi, 0
 ; GFX9-PAL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-PAL-NEXT:    s_and_b32 s3, s3, 0xffff
 ; GFX9-PAL-NEXT:    s_add_u32 flat_scratch_lo, s2, s1
@@ -86,11 +84,13 @@ define amdgpu_kernel void @zero_init_kernel() {
 ; GFX9-PAL-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX9-PAL-NEXT:    v_mov_b32_e32 v2, s2
 ; GFX9-PAL-NEXT:    v_mov_b32_e32 v3, s3
+; GFX9-PAL-NEXT:    s_mov_b32 s3, 0
+; GFX9-PAL-NEXT:    s_mov_b32 s2, 0
 ; GFX9-PAL-NEXT:    s_mov_b32 s1, 0
-; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s1 offset:52
-; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:36
-; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:20
-; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:4
+; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s3 offset:52
+; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s2 offset:36
+; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s1 offset:20
+; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:4
 ; GFX9-PAL-NEXT:    s_endpgm
 ;
 ; GFX940-LABEL: zero_init_kernel:
@@ -119,7 +119,6 @@ define amdgpu_kernel void @zero_init_kernel() {
 ; GFX1010-PAL-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2
 ; GFX1010-PAL-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3
 ; GFX1010-PAL-NEXT:    s_mov_b32 s0, 0
-; GFX1010-PAL-NEXT:    s_mov_b32 vcc_lo, 0
 ; GFX1010-PAL-NEXT:    s_mov_b32 s1, s0
 ; GFX1010-PAL-NEXT:    s_mov_b32 s2, s0
 ; GFX1010-PAL-NEXT:    s_mov_b32 s3, s0
@@ -127,12 +126,13 @@ define amdgpu_kernel void @zero_init_kernel() {
 ; GFX1010-PAL-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX1010-PAL-NEXT:    v_mov_b32_e32 v2, s2
 ; GFX1010-PAL-NEXT:    v_mov_b32_e32 v3, s3
+; GFX1010-PAL-NEXT:    s_mov_b32 s3, 0
 ; GFX1010-PAL-NEXT:    s_mov_b32 s2, 0
 ; GFX1010-PAL-NEXT:    s_mov_b32 s1, 0
-; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s2 offset:52
-; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s1 offset:36
-; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:20
-; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:4
+; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s3 offset:52
+; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s2 offset:36
+; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s1 offset:20
+; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:4
 ; GFX1010-PAL-NEXT:    s_endpgm
 ;
 ; GFX1030-PAL-LABEL: zero_init_kernel:
@@ -903,13 +903,13 @@ define amdgpu_kernel void @zero_init_small_offset_kernel() {
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX9-NEXT:    v_mov_b32_e32 v2, s2
 ; GFX9-NEXT:    v_mov_b32_e32 v3, s3
+; GFX9-NEXT:    s_mov_b32 s3, 0
+; GFX9-NEXT:    s_mov_b32 s2, 0
 ; GFX9-NEXT:    s_mov_b32 s1, 0
-; GFX9-NEXT:    s_mov_b32 vcc_lo, 0
-; GFX9-NEXT:    s_mov_b32 vcc_hi, 0
-; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], s1 offset:260
-; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:276
-; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:292
-; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:308
+; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], s3 offset:260
+; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], s2 offset:276
+; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], s1 offset:292
+; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:308
 ; GFX9-NEXT:    s_endpgm
 ;
 ; GFX10-LABEL: zero_init_small_offset_kernel:
@@ -960,8 +960,6 @@ define amdgpu_kernel void @zero_init_small_offset_kernel() {
 ; GFX9-PAL-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
 ; GFX9-PAL-NEXT:    s_mov_b32 s4, 0
 ; GFX9-PAL-NEXT:    s_mov_b32 s0, 0
-; GFX9-PAL-NEXT:    s_mov_b32 vcc_lo, 0
-; GFX9-PAL-NEXT:    s_mov_b32 vcc_hi, 0
 ; GFX9-PAL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-PAL-NEXT:    s_and_b32 s3, s3, 0xffff
 ; GFX9-PAL-NEXT:    s_add_u32 flat_scratch_lo, s2, s1
@@ -975,11 +973,13 @@ define amdgpu_kernel void @zero_init_small_offset_kernel() {
 ; GFX9-PAL-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX9-PAL-NEXT:    v_mov_b32_e32 v2, s2
 ; GFX9-PAL-NEXT:    v_mov_b32_e32 v3, s3
+; GFX9-PAL-NEXT:    s_mov_b32 s3, 0
+; GFX9-PAL-NEXT:    s_mov_b32 s2, 0
 ; GFX9-PAL-NEXT:    s_mov_b32 s1, 0
-; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s1 offset:260
-; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:276
-; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:292
-; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:308
+; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s3 offset:260
+; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s2 offset:276
+; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s1 offset:292
+; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:308
 ; GFX9-PAL-NEXT:    s_endpgm
 ;
 ; GFX940-LABEL: zero_init_small_offset_kernel:
@@ -1020,13 +1020,13 @@ define amdgpu_kernel void @zero_init_small_offset_kernel() {
 ; GFX1010-PAL-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX1010-PAL-NEXT:    v_mov_b32_e32 v2, s2
 ; GFX1010-PAL-NEXT:    v_mov_b32_e32 v3, s3
+; GFX1010-PAL-NEXT:    s_mov_b32 s3, 0
 ; GFX1010-PAL-NEXT:    s_mov_b32 s2, 0
 ; GFX1010-PAL-NEXT:    s_mov_b32 s1, 0
-; GFX1010-PAL-NEXT:    s_mov_b32 vcc_lo, 0
-; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s2 offset:260
-; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s1 offset:276
-; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:292
-; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:308
+; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s3 offset:260
+; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s2 offset:276
+; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s1 offset:292
+; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:308
 ; GFX1010-PAL-NEXT:    s_endpgm
 ;
 ; GFX1030-PAL-LABEL: zero_init_small_offset_kernel:
@@ -1238,8 +1238,8 @@ define amdgpu_kernel void @store_load_sindex_small_offset_kernel(i32 %idx) {
 ; GFX9-NEXT:    s_load_dword s0, s[0:1], 0x24
 ; GFX9-NEXT:    s_add_u32 flat_scratch_lo, s2, s5
 ; GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s3, 0
-; GFX9-NEXT:    s_mov_b32 vcc_hi, 0
-; GFX9-NEXT:    scratch_load_dword v0, off, vcc_hi offset:4 glc
+; GFX9-NEXT:    s_mov_b32 s2, 0
+; GFX9-NEXT:    scratch_load_dword v0, off, s2 offset:4 glc
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    s_lshl_b32 s1, s0, 2
 ; GFX9-NEXT:    s_and_b32 s0, s0, 15
@@ -1298,13 +1298,13 @@ define amdgpu_kernel void @store_load_sindex_small_offset_kernel(i32 %idx) {
 ; GFX9-PAL-NEXT:    s_getpc_b64 s[4:5]
 ; GFX9-PAL-NEXT:    s_mov_b32 s4, s0
 ; GFX9-PAL-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
-; GFX9-PAL-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-PAL-NEXT:    s_mov_b32 s2, 0
 ; GFX9-PAL-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; GFX9-PAL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-PAL-NEXT:    s_and_b32 s5, s5, 0xffff
 ; GFX9-PAL-NEXT:    s_add_u32 flat_scratch_lo, s4, s3
 ; GFX9-PAL-NEXT:    s_addc_u32 flat_scratch_hi, s5, 0
-; GFX9-PAL-NEXT:    scratch_load_dword v0, off, vcc_hi offset:4 glc
+; GFX9-PAL-NEXT:    scratch_load_dword v0, off, s2 offset:4 glc
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_lshl_b32 s1, s0, 2
 ; GFX9-PAL-NEXT:    s_and_b32 s0, s0, 15
@@ -1348,8 +1348,8 @@ define amdgpu_kernel void @store_load_sindex_small_offset_kernel(i32 %idx) {
 ; GFX1010-PAL-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s4
 ; GFX1010-PAL-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s5
 ; GFX1010-PAL-NEXT:    s_load_dword s0, s[0:1], 0x0
-; GFX1010-PAL-NEXT:    s_mov_b32 vcc_lo, 0
-; GFX1010-PAL-NEXT:    scratch_load_dword v0, off, vcc_lo offset:4 glc dlc
+; GFX1010-PAL-NEXT:    s_mov_b32 s2, 0
+; GFX1010-PAL-NEXT:    scratch_load_dword v0, off, s2 offset:4 glc dlc
 ; GFX1010-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX1010-PAL-NEXT:    v_mov_b32_e32 v0, 15
 ; GFX1010-PAL-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1426,8 +1426,8 @@ define amdgpu_ps void @store_load_sindex_small_offset_foo(i32 inreg %idx) {
 ; GFX9:       ; %bb.0: ; %bb
 ; GFX9-NEXT:    s_add_u32 flat_scratch_lo, s0, s3
 ; GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s1, 0
-; GFX9-NEXT:    s_mov_b32 vcc_hi, 0
-; GFX9-NEXT:    scratch_load_dword v0, off, vcc_hi offset:4 glc
+; GFX9-NEXT:    s_mov_b32 s1, 0
+; GFX9-NEXT:    scratch_load_dword v0, off, s1 offset:4 glc
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_lshl_b32 s0, s2, 2
 ; GFX9-NEXT:    s_addk_i32 s0, 0x104
@@ -1482,12 +1482,12 @@ define amdgpu_ps void @store_load_sindex_small_offset_foo(i32 inreg %idx) {
 ; GFX9-PAL-NEXT:    s_getpc_b64 s[2:3]
 ; GFX9-PAL-NEXT:    s_mov_b32 s2, s0
 ; GFX9-PAL-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
-; GFX9-PAL-NEXT:    s_mov_b32 vcc_hi, 0
 ; GFX9-PAL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-PAL-NEXT:    s_and_b32 s3, s3, 0xffff
 ; GFX9-PAL-NEXT:    s_add_u32 flat_scratch_lo, s2, s1
 ; GFX9-PAL-NEXT:    s_addc_u32 flat_scratch_hi, s3, 0
-; GFX9-PAL-NEXT:    scratch_load_dword v0, off, vcc_hi offset:4 glc
+; GFX9-PAL-NEXT:    s_mov_b32 s2, 0
+; GFX9-PAL-NEXT:    scratch_load_dword v0, off, s2 offset:4 glc
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_lshl_b32 s1, s0, 2
 ; GFX9-PAL-NEXT:    s_and_b32 s0, s0, 15
@@ -1528,9 +1528,9 @@ define amdgpu_ps void @store_load_sindex_small_offset_foo(i32 inreg %idx) {
 ; GFX1010-PAL-NEXT:    s_addc_u32 s3, s3, 0
 ; GFX1010-PAL-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2
 ; GFX1010-PAL-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3
-; GFX1010-PAL-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX1010-PAL-NEXT:    s_mov_b32 s2, 0
 ; GFX1010-PAL-NEXT:    s_and_b32 s1, s0, 15
-; GFX1010-PAL-NEXT:    scratch_load_dword v0, off, vcc_lo offset:4 glc dlc
+; GFX1010-PAL-NEXT:    scratch_load_dword v0, off, s2 offset:4 glc dlc
 ; GFX1010-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX1010-PAL-NEXT:    v_mov_b32_e32 v0, 15
 ; GFX1010-PAL-NEXT:    s_lshl_b32 s0, s0, 2
@@ -1601,8 +1601,8 @@ define amdgpu_kernel void @store_load_vindex_small_offset_kernel() {
 ; GFX9:       ; %bb.0: ; %bb
 ; GFX9-NEXT:    s_add_u32 flat_scratch_lo, s0, s3
 ; GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s1, 0
-; GFX9-NEXT:    s_mov_b32 vcc_hi, 0
-; GFX9-NEXT:    scratch_load_dword v1, off, vcc_hi offset:4 glc
+; GFX9-NEXT:    s_mov_b32 s0, 0
+; GFX9-NEXT:    scratch_load_dword v1, off, s0 offset:4 glc
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX9-NEXT:    v_add_u32_e32 v1, 0x104, v0
@@ -1653,14 +1653,14 @@ define amdgpu_kernel void @store_load_vindex_small_offset_kernel() {
 ; GFX9-PAL-NEXT:    s_getpc_b64 s[2:3]
 ; GFX9-PAL-NEXT:    s_mov_b32 s2, s0
 ; GFX9-PAL-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
-; GFX9-PAL-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-PAL-NEXT:    s_mov_b32 s0, 0
 ; GFX9-PAL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX9-PAL-NEXT:    v_mov_b32_e32 v2, 15
 ; GFX9-PAL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-PAL-NEXT:    s_and_b32 s3, s3, 0xffff
 ; GFX9-PAL-NEXT:    s_add_u32 flat_scratch_lo, s2, s1
 ; GFX9-PAL-NEXT:    s_addc_u32 flat_scratch_hi, s3, 0
-; GFX9-PAL-NEXT:    scratch_load_dword v1, off, vcc_hi offset:4 glc
+; GFX9-PAL-NEXT:    scratch_load_dword v1, off, s0 offset:4 glc
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    v_add_u32_e32 v1, 0x104, v0
 ; GFX9-PAL-NEXT:    v_sub_u32_e32 v0, 0x104, v0
@@ -1698,8 +1698,8 @@ define amdgpu_kernel void @store_load_vindex_small_offset_kernel() {
 ; GFX1010-PAL-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3
 ; GFX1010-PAL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX1010-PAL-NEXT:    v_mov_b32_e32 v2, 15
-; GFX1010-PAL-NEXT:    s_mov_b32 vcc_lo, 0
-; GFX1010-PAL-NEXT:    scratch_load_dword v3, off, vcc_lo offset:4 glc dlc
+; GFX1010-PAL-NEXT:    s_mov_b32 s0, 0
+; GFX1010-PAL-NEXT:    scratch_load_dword v3, off, s0 offset:4 glc dlc
 ; GFX1010-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX1010-PAL-NEXT:    v_sub_nc_u32_e32 v1, 0x104, v0
 ; GFX1010-PAL-NEXT:    v_add_nc_u32_e32 v0, 0x104, v0
@@ -1768,8 +1768,8 @@ define void @store_load_vindex_small_offset_foo(i32 %idx) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    scratch_load_dword v1, off, s32 glc
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    s_add_i32 vcc_hi, s32, 0x100
-; GFX9-NEXT:    v_mov_b32_e32 v1, vcc_hi
+; GFX9-NEXT:    s_add_i32 s0, s32, 0x100
+; GFX9-NEXT:    v_mov_b32_e32 v1, s0
 ; GFX9-NEXT:    v_lshl_add_u32 v2, v0, 2, v1
 ; GFX9-NEXT:    v_mov_b32_e32 v3, 15
 ; GFX9-NEXT:    v_and_b32_e32 v0, 15, v0
@@ -1785,11 +1785,11 @@ define void @store_load_vindex_small_offset_foo(i32 %idx) {
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-NEXT:    v_and_b32_e32 v1, 15, v0
+; GFX10-NEXT:    s_add_i32 s1, s32, 0x100
 ; GFX10-NEXT:    s_add_i32 s0, s32, 0x100
-; GFX10-NEXT:    s_add_i32 vcc_lo, s32, 0x100
-; GFX10-NEXT:    v_lshl_add_u32 v0, v0, 2, s0
+; GFX10-NEXT:    v_lshl_add_u32 v0, v0, 2, s1
 ; GFX10-NEXT:    v_mov_b32_e32 v2, 15
-; GFX10-NEXT:    v_lshl_add_u32 v1, v1, 2, vcc_lo
+; GFX10-NEXT:    v_lshl_add_u32 v1, v1, 2, s0
 ; GFX10-NEXT:    scratch_load_dword v3, off, s32 glc dlc
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    scratch_store_dword v0, v2, off
@@ -1803,10 +1803,10 @@ define void @store_load_vindex_small_offset_foo(i32 %idx) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX11-NEXT:    v_dual_mov_b32 v2, 15 :: v_dual_and_b32 v1, 15, v0
-; GFX11-NEXT:    s_add_i32 vcc_lo, s32, 0x100
+; GFX11-NEXT:    s_add_i32 s0, s32, 0x100
 ; GFX11-NEXT:    scratch_load_b32 v3, off, s32 glc dlc
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_lshl_add_u32 v0, v0, 2, vcc_lo
+; GFX11-NEXT:    v_lshl_add_u32 v0, v0, 2, s0
 ; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 2, v1
 ; GFX11-NEXT:    scratch_store_b32 v0, v2, off dlc
 ; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
@@ -1819,8 +1819,8 @@ define void @store_load_vindex_small_offset_foo(i32 %idx) {
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-PAL-NEXT:    scratch_load_dword v1, off, s32 glc
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-PAL-NEXT:    s_add_i32 vcc_hi, s32, 0x100
-; GFX9-PAL-NEXT:    v_mov_b32_e32 v1, vcc_hi
+; GFX9-PAL-NEXT:    s_add_i32 s0, s32, 0x100
+; GFX9-PAL-NEXT:    v_mov_b32_e32 v1, s0
 ; GFX9-PAL-NEXT:    v_lshl_add_u32 v2, v0, 2, v1
 ; GFX9-PAL-NEXT:    v_mov_b32_e32 v3, 15
 ; GFX9-PAL-NEXT:    v_and_b32_e32 v0, 15, v0
@@ -1836,8 +1836,8 @@ define void @store_load_vindex_small_offset_foo(i32 %idx) {
 ; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX940-NEXT:    scratch_load_dword v1, off, s32 sc0 sc1
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_add_i32 vcc_hi, s32, 0x100
-; GFX940-NEXT:    v_mov_b32_e32 v1, vcc_hi
+; GFX940-NEXT:    s_add_i32 s0, s32, 0x100
+; GFX940-NEXT:    v_mov_b32_e32 v1, s0
 ; GFX940-NEXT:    v_lshl_add_u32 v1, v0, 2, v1
 ; GFX940-NEXT:    v_mov_b32_e32 v2, 15
 ; GFX940-NEXT:    v_and_b32_e32 v0, 15, v0
@@ -1853,11 +1853,11 @@ define void @store_load_vindex_small_offset_foo(i32 %idx) {
 ; GFX10-PAL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-PAL-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-PAL-NEXT:    v_and_b32_e32 v1, 15, v0
+; GFX10-PAL-NEXT:    s_add_i32 s1, s32, 0x100
 ; GFX10-PAL-NEXT:    s_add_i32 s0, s32, 0x100
-; GFX10-PAL-NEXT:    s_add_i32 vcc_lo, s32, 0x100
-; GFX10-PAL-NEXT:    v_lshl_add_u32 v0, v0, 2, s0
+; GFX10-PAL-NEXT:    v_lshl_add_u32 v0, v0, 2, s1
 ; GFX10-PAL-NEXT:    v_mov_b32_e32 v2, 15
-; GFX10-PAL-NEXT:    v_lshl_add_u32 v1, v1, 2, vcc_lo
+; GFX10-PAL-NEXT:    v_lshl_add_u32 v1, v1, 2, s0
 ; GFX10-PAL-NEXT:    scratch_load_dword v3, off, s32 glc dlc
 ; GFX10-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-PAL-NEXT:    scratch_store_dword v0, v2, off
@@ -1871,10 +1871,10 @@ define void @store_load_vindex_small_offset_foo(i32 %idx) {
 ; GFX11-PAL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-PAL-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX11-PAL-NEXT:    v_dual_mov_b32 v2, 15 :: v_dual_and_b32 v1, 15, v0
-; GFX11-PAL-NEXT:    s_add_i32 vcc_lo, s32, 0x100
+; GFX11-PAL-NEXT:    s_add_i32 s0, s32, 0x100
 ; GFX11-PAL-NEXT:    scratch_load_b32 v3, off, s32 glc dlc
 ; GFX11-PAL-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-PAL-NEXT:    v_lshl_add_u32 v0, v0, 2, vcc_lo
+; GFX11-PAL-NEXT:    v_lshl_add_u32 v0, v0, 2, s0
 ; GFX11-PAL-NEXT:    v_lshlrev_b32_e32 v1, 2, v1
 ; GFX11-PAL-NEXT:    scratch_store_b32 v0, v2, off dlc
 ; GFX11-PAL-NEXT:    s_waitcnt_vscnt null, 0x0
@@ -1910,14 +1910,14 @@ define amdgpu_kernel void @zero_init_large_offset_kernel() {
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX9-NEXT:    v_mov_b32_e32 v2, s2
 ; GFX9-NEXT:    v_mov_b32_e32 v3, s3
+; GFX9-NEXT:    s_movk_i32 s3, 0x4004
+; GFX9-NEXT:    s_movk_i32 s2, 0x4004
 ; GFX9-NEXT:    s_movk_i32 s1, 0x4004
 ; GFX9-NEXT:    s_movk_i32 s0, 0x4004
-; GFX9-NEXT:    s_movk_i32 vcc_lo, 0x4004
-; GFX9-NEXT:    s_movk_i32 vcc_hi, 0x4004
-; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], s1
-; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:16
-; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:32
-; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:48
+; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], s3
+; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], s2 offset:16
+; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], s1 offset:32
+; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:48
 ; GFX9-NEXT:    s_endpgm
 ;
 ; GFX10-LABEL: zero_init_large_offset_kernel:
@@ -1929,7 +1929,6 @@ define amdgpu_kernel void @zero_init_large_offset_kernel() {
 ; GFX10-NEXT:    scratch_load_dword v0, off, off offset:4 glc dlc
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    s_mov_b32 s0, 0
-; GFX10-NEXT:    s_movk_i32 vcc_lo, 0x4004
 ; GFX10-NEXT:    s_mov_b32 s1, s0
 ; GFX10-NEXT:    s_mov_b32 s2, s0
 ; GFX10-NEXT:    s_mov_b32 s3, s0
@@ -1937,13 +1936,14 @@ define amdgpu_kernel void @zero_init_large_offset_kernel() {
 ; GFX10-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX10-NEXT:    v_mov_b32_e32 v2, s2
 ; GFX10-NEXT:    v_mov_b32_e32 v3, s3
+; GFX10-NEXT:    s_movk_i32 s3, 0x4004
 ; GFX10-NEXT:    s_movk_i32 s2, 0x4004
 ; GFX10-NEXT:    s_movk_i32 s1, 0x4004
 ; GFX10-NEXT:    s_movk_i32 s0, 0x4004
-; GFX10-NEXT:    scratch_store_dwordx4 off, v[0:3], s2
-; GFX10-NEXT:    scratch_store_dwordx4 off, v[0:3], s1 offset:16
-; GFX10-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:32
-; GFX10-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:48
+; GFX10-NEXT:    scratch_store_dwordx4 off, v[0:3], s3
+; GFX10-NEXT:    scratch_store_dwordx4 off, v[0:3], s2 offset:16
+; GFX10-NEXT:    scratch_store_dwordx4 off, v[0:3], s1 offset:32
+; GFX10-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:48
 ; GFX10-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: zero_init_large_offset_kernel:
@@ -1951,20 +1951,21 @@ define amdgpu_kernel void @zero_init_large_offset_kernel() {
 ; GFX11-NEXT:    scratch_load_b32 v0, off, off offset:4 glc dlc
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    s_mov_b32 s0, 0
-; GFX11-NEXT:    s_movk_i32 vcc_lo, 0x4004
+; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
 ; GFX11-NEXT:    s_mov_b32 s1, s0
 ; GFX11-NEXT:    s_mov_b32 s2, s0
 ; GFX11-NEXT:    s_mov_b32 s3, s0
 ; GFX11-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
 ; GFX11-NEXT:    v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
+; GFX11-NEXT:    s_movk_i32 s3, 0x4004
 ; GFX11-NEXT:    s_movk_i32 s2, 0x4004
 ; GFX11-NEXT:    s_movk_i32 s1, 0x4004
 ; GFX11-NEXT:    s_movk_i32 s0, 0x4004
 ; GFX11-NEXT:    s_clause 0x3
-; GFX11-NEXT:    scratch_store_b128 off, v[0:3], s2
-; GFX11-NEXT:    scratch_store_b128 off, v[0:3], s1 offset:16
-; GFX11-NEXT:    scratch_store_b128 off, v[0:3], s0 offset:32
-; GFX11-NEXT:    scratch_store_b128 off, v[0:3], vcc_lo offset:48
+; GFX11-NEXT:    scratch_store_b128 off, v[0:3], s3
+; GFX11-NEXT:    scratch_store_b128 off, v[0:3], s2 offset:16
+; GFX11-NEXT:    scratch_store_b128 off, v[0:3], s1 offset:32
+; GFX11-NEXT:    scratch_store_b128 off, v[0:3], s0 offset:48
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
 ;
@@ -1975,8 +1976,6 @@ define amdgpu_kernel void @zero_init_large_offset_kernel() {
 ; GFX9-PAL-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
 ; GFX9-PAL-NEXT:    s_mov_b32 s4, 0
 ; GFX9-PAL-NEXT:    s_mov_b32 s0, 0
-; GFX9-PAL-NEXT:    s_movk_i32 vcc_lo, 0x4004
-; GFX9-PAL-NEXT:    s_movk_i32 vcc_hi, 0x4004
 ; GFX9-PAL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-PAL-NEXT:    s_and_b32 s3, s3, 0xffff
 ; GFX9-PAL-NEXT:    s_add_u32 flat_scratch_lo, s2, s1
@@ -1990,12 +1989,14 @@ define amdgpu_kernel void @zero_init_large_offset_kernel() {
 ; GFX9-PAL-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX9-PAL-NEXT:    v_mov_b32_e32 v2, s2
 ; GFX9-PAL-NEXT:    v_mov_b32_e32 v3, s3
+; GFX9-PAL-NEXT:    s_movk_i32 s3, 0x4004
+; GFX9-PAL-NEXT:    s_movk_i32 s2, 0x4004
 ; GFX9-PAL-NEXT:    s_movk_i32 s1, 0x4004
 ; GFX9-PAL-NEXT:    s_movk_i32 s0, 0x4004
-; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s1
-; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:16
-; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:32
-; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:48
+; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s3
+; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s2 offset:16
+; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s1 offset:32
+; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:48
 ; GFX9-PAL-NEXT:    s_endpgm
 ;
 ; GFX940-LABEL: zero_init_large_offset_kernel:
@@ -2008,14 +2009,14 @@ define amdgpu_kernel void @zero_init_large_offset_kernel() {
 ; GFX940-NEXT:    s_mov_b32 s3, s0
 ; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
 ; GFX940-NEXT:    v_mov_b64_e32 v[2:3], s[2:3]
+; GFX940-NEXT:    s_movk_i32 s3, 0x4004
+; GFX940-NEXT:    s_movk_i32 s2, 0x4004
 ; GFX940-NEXT:    s_movk_i32 s1, 0x4004
 ; GFX940-NEXT:    s_movk_i32 s0, 0x4004
-; GFX940-NEXT:    s_movk_i32 vcc_lo, 0x4004
-; GFX940-NEXT:    s_movk_i32 vcc_hi, 0x4004
-; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], s1 sc0 sc1
-; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:16 sc0 sc1
-; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:32 sc0 sc1
-; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:48 sc0 sc1
+; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], s3 sc0 sc1
+; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], s2 offset:16 sc0 sc1
+; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], s1 offset:32 sc0 sc1
+; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:48 sc0 sc1
 ; GFX940-NEXT:    s_endpgm
 ;
 ; GFX1010-PAL-LABEL: zero_init_large_offset_kernel:
@@ -2040,14 +2041,14 @@ define amdgpu_kernel void @zero_init_large_offset_kernel() {
 ; GFX1010-PAL-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX1010-PAL-NEXT:    v_mov_b32_e32 v2, s2
 ; GFX1010-PAL-NEXT:    v_mov_b32_e32 v3, s3
+; GFX1010-PAL-NEXT:    s_movk_i32 s3, 0x4004
 ; GFX1010-PAL-NEXT:    s_movk_i32 s2, 0x4004
 ; GFX1010-PAL-NEXT:    s_movk_i32 s1, 0x4004
 ; GFX1010-PAL-NEXT:    s_movk_i32 s0, 0x4004
-; GFX1010-PAL-NEXT:    s_movk_i32 vcc_lo, 0x4004
-; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s2
-; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s1 offset:16
-; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:32
-; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:48
+; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s3
+; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s2 offset:16
+; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s1 offset:32
+; GFX1010-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:48
 ; GFX1010-PAL-NEXT:    s_endpgm
 ;
 ; GFX1030-PAL-LABEL: zero_init_large_offset_kernel:
@@ -2064,7 +2065,6 @@ define amdgpu_kernel void @zero_init_large_offset_kernel() {
 ; GFX1030-PAL-NEXT:    scratch_load_dword v0, off, off offset:4 glc dlc
 ; GFX1030-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX1030-PAL-NEXT:    s_mov_b32 s0, 0
-; GFX1030-PAL-NEXT:    s_movk_i32 vcc_lo, 0x4004
 ; GFX1030-PAL-NEXT:    s_mov_b32 s1, s0
 ; GFX1030-PAL-NEXT:    s_mov_b32 s2, s0
 ; GFX1030-PAL-NEXT:    s_mov_b32 s3, s0
@@ -2072,13 +2072,14 @@ define amdgpu_kernel void @zero_init_large_offset_kernel() {
 ; GFX1030-PAL-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX1030-PAL-NEXT:    v_mov_b32_e32 v2, s2
 ; GFX1030-PAL-NEXT:    v_mov_b32_e32 v3, s3
+; GFX1030-PAL-NEXT:    s_movk_i32 s3, 0x4004
 ; GFX1030-PAL-NEXT:    s_movk_i32 s2, 0x4004
 ; GFX1030-PAL-NEXT:    s_movk_i32 s1, 0x4004
 ; GFX1030-PAL-NEXT:    s_movk_i32 s0, 0x4004
-; GFX1030-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s2
-; GFX1030-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s1 offset:16
-; GFX1030-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:32
-; GFX1030-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:48
+; GFX1030-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s3
+; GFX1030-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s2 offset:16
+; GFX1030-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s1 offset:32
+; GFX1030-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:48
 ; GFX1030-PAL-NEXT:    s_endpgm
 ;
 ; GFX11-PAL-LABEL: zero_init_large_offset_kernel:
@@ -2086,20 +2087,21 @@ define amdgpu_kernel void @zero_init_large_offset_kernel() {
 ; GFX11-PAL-NEXT:    scratch_load_b32 v0, off, off offset:4 glc dlc
 ; GFX11-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-PAL-NEXT:    s_mov_b32 s0, 0
-; GFX11-PAL-NEXT:    s_movk_i32 vcc_lo, 0x4004
+; GFX11-PAL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
 ; GFX11-PAL-NEXT:    s_mov_b32 s1, s0
 ; GFX11-PAL-NEXT:    s_mov_b32 s2, s0
 ; GFX11-PAL-NEXT:    s_mov_b32 s3, s0
 ; GFX11-PAL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
 ; GFX11-PAL-NEXT:    v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
+; GFX11-PAL-NEXT:    s_movk_i32 s3, 0x4004
 ; GFX11-PAL-NEXT:    s_movk_i32 s2, 0x4004
 ; GFX11-PAL-NEXT:    s_movk_i32 s1, 0x4004
 ; GFX11-PAL-NEXT:    s_movk_i32 s0, 0x4004
 ; GFX11-PAL-NEXT:    s_clause 0x3
-; GFX11-PAL-NEXT:    scratch_store_b128 off, v[0:3], s2
-; GFX11-PAL-NEXT:    scratch_store_b128 off, v[0:3], s1 offset:16
-; GFX11-PAL-NEXT:    scratch_store_b128 off, v[0:3], s0 offset:32
-; GFX11-PAL-NEXT:    scratch_store_b128 off, v[0:3], vcc_lo offset:48
+; GFX11-PAL-NEXT:    scratch_store_b128 off, v[0:3], s3
+; GFX11-PAL-NEXT:    scratch_store_b128 off, v[0:3], s2 offset:16
+; GFX11-PAL-NEXT:    scratch_store_b128 off, v[0:3], s1 offset:32
+; GFX11-PAL-NEXT:    scratch_store_b128 off, v[0:3], s0 offset:48
 ; GFX11-PAL-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-PAL-NEXT:    s_endpgm
   %padding = alloca [4096 x i32], align 4, addrspace(5)
@@ -2124,14 +2126,14 @@ define void @zero_init_large_offset_foo() {
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX9-NEXT:    v_mov_b32_e32 v2, s2
 ; GFX9-NEXT:    v_mov_b32_e32 v3, s3
+; GFX9-NEXT:    s_add_i32 s3, s32, 0x4004
+; GFX9-NEXT:    s_add_i32 s2, s32, 0x4004
 ; GFX9-NEXT:    s_add_i32 s1, s32, 0x4004
 ; GFX9-NEXT:    s_add_i32 s0, s32, 0x4004
-; GFX9-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
-; GFX9-NEXT:    s_add_i32 vcc_hi, s32, 0x4004
-; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], s1
-; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:16
-; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:32
-; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:48
+; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], s3
+; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], s2 offset:16
+; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], s1 offset:32
+; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:48
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -2149,14 +2151,14 @@ define void @zero_init_large_offset_foo() {
 ; GFX10-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX10-NEXT:    v_mov_b32_e32 v2, s2
 ; GFX10-NEXT:    v_mov_b32_e32 v3, s3
+; GFX10-NEXT:    s_add_i32 s3, s32, 0x4004
 ; GFX10-NEXT:    s_add_i32 s2, s32, 0x4004
 ; GFX10-NEXT:    s_add_i32 s1, s32, 0x4004
 ; GFX10-NEXT:    s_add_i32 s0, s32, 0x4004
-; GFX10-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
-; GFX10-NEXT:    scratch_store_dwordx4 off, v[0:3], s2
-; GFX10-NEXT:    scratch_store_dwordx4 off, v[0:3], s1 offset:16
-; GFX10-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:32
-; GFX10-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:48
+; GFX10-NEXT:    scratch_store_dwordx4 off, v[0:3], s3
+; GFX10-NEXT:    scratch_store_dwordx4 off, v[0:3], s2 offset:16
+; GFX10-NEXT:    scratch_store_dwordx4 off, v[0:3], s1 offset:32
+; GFX10-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:48
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -2173,15 +2175,15 @@ define void @zero_init_large_offset_foo() {
 ; GFX11-NEXT:    s_mov_b32 s3, s0
 ; GFX11-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
 ; GFX11-NEXT:    v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
+; GFX11-NEXT:    s_add_i32 s3, s32, 0x4004
 ; GFX11-NEXT:    s_add_i32 s2, s32, 0x4004
 ; GFX11-NEXT:    s_add_i32 s1, s32, 0x4004
 ; GFX11-NEXT:    s_add_i32 s0, s32, 0x4004
-; GFX11-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
 ; GFX11-NEXT:    s_clause 0x3
-; GFX11-NEXT:    scratch_store_b128 off, v[0:3], s2
-; GFX11-NEXT:    scratch_store_b128 off, v[0:3], s1 offset:16
-; GFX11-NEXT:    scratch_store_b128 off, v[0:3], s0 offset:32
-; GFX11-NEXT:    scratch_store_b128 off, v[0:3], vcc_lo offset:48
+; GFX11-NEXT:    scratch_store_b128 off, v[0:3], s3
+; GFX11-NEXT:    scratch_store_b128 off, v[0:3], s2 offset:16
+; GFX11-NEXT:    scratch_store_b128 off, v[0:3], s1 offset:32
+; GFX11-NEXT:    scratch_store_b128 off, v[0:3], s0 offset:48
 ; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -2198,14 +2200,14 @@ define void @zero_init_large_offset_foo() {
 ; GFX9-PAL-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX9-PAL-NEXT:    v_mov_b32_e32 v2, s2
 ; GFX9-PAL-NEXT:    v_mov_b32_e32 v3, s3
+; GFX9-PAL-NEXT:    s_add_i32 s3, s32, 0x4004
+; GFX9-PAL-NEXT:    s_add_i32 s2, s32, 0x4004
 ; GFX9-PAL-NEXT:    s_add_i32 s1, s32, 0x4004
 ; GFX9-PAL-NEXT:    s_add_i32 s0, s32, 0x4004
-; GFX9-PAL-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
-; GFX9-PAL-NEXT:    s_add_i32 vcc_hi, s32, 0x4004
-; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s1
-; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:16
-; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:32
-; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:48
+; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s3
+; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s2 offset:16
+; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s1 offset:32
+; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:48
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -2220,14 +2222,14 @@ define void @zero_init_large_offset_foo() {
 ; GFX940-NEXT:    s_mov_b32 s3, s0
 ; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
 ; GFX940-NEXT:    v_mov_b64_e32 v[2:3], s[2:3]
+; GFX940-NEXT:    s_add_i32 s3, s32, 0x4004
+; GFX940-NEXT:    s_add_i32 s2, s32, 0x4004
 ; GFX940-NEXT:    s_add_i32 s1, s32, 0x4004
 ; GFX940-NEXT:    s_add_i32 s0, s32, 0x4004
-; GFX940-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
-; GFX940-NEXT:    s_add_i32 vcc_hi, s32, 0x4004
-; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], s1 sc0 sc1
-; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:16 sc0 sc1
-; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:32 sc0 sc1
-; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_hi offset:48 sc0 sc1
+; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], s3 sc0 sc1
+; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], s2 offset:16 sc0 sc1
+; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], s1 offset:32 sc0 sc1
+; GFX940-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:48 sc0 sc1
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -2245,14 +2247,14 @@ define void @zero_init_large_offset_foo() {
 ; GFX10-PAL-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX10-PAL-NEXT:    v_mov_b32_e32 v2, s2
 ; GFX10-PAL-NEXT:    v_mov_b32_e32 v3, s3
+; GFX10-PAL-NEXT:    s_add_i32 s3, s32, 0x4004
 ; GFX10-PAL-NEXT:    s_add_i32 s2, s32, 0x4004
 ; GFX10-PAL-NEXT:    s_add_i32 s1, s32, 0x4004
 ; GFX10-PAL-NEXT:    s_add_i32 s0, s32, 0x4004
-; GFX10-PAL-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
-; GFX10-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s2
-; GFX10-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s1 offset:16
-; GFX10-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:32
-; GFX10-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:48
+; GFX10-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s3
+; GFX10-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s2 offset:16
+; GFX10-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s1 offset:32
+; GFX10-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s0 offset:48
 ; GFX10-PAL-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-PAL-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -2269,15 +2271,15 @@ define void @zero_init_large_offset_foo() {
 ; GFX11-PAL-NEXT:    s_mov_b32 s3, s0
 ; GFX11-PAL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
 ; GFX11-PAL-NEXT:    v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
+; GFX11-PAL-NEXT:    s_add_i32 s3, s32, 0x4004
 ; GFX11-PAL-NEXT:    s_add_i32 s2, s32, 0x4004
 ; GFX11-PAL-NEXT:    s_add_i32 s1, s32, 0x4004
 ; GFX11-PAL-NEXT:    s_add_i32 s0, s32, 0x4004
-; GFX11-PAL-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
 ; GFX11-PAL-NEXT:    s_clause 0x3
-; GFX11-PAL-NEXT:    scratch_store_b128 off, v[0:3], s2
-; GFX11-PAL-NEXT:    scratch_store_b128 off, v[0:3], s1 offset:16
-; GFX11-PAL-NEXT:    scratch_store_b128 off, v[0:3], s0 offset:32
-; GFX11-PAL-NEXT:    scratch_store_b128 off, v[0:3], vcc_lo offset:48
+; GFX11-PAL-NEXT:    scratch_store_b128 off, v[0:3], s3
+; GFX11-PAL-NEXT:    scratch_store_b128 off, v[0:3], s2 offset:16
+; GFX11-PAL-NEXT:    scratch_store_b128 off, v[0:3], s1 offset:32
+; GFX11-PAL-NEXT:    scratch_store_b128 off, v[0:3], s0 offset:48
 ; GFX11-PAL-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX11-PAL-NEXT:    s_setpc_b64 s[30:31]
   %padding = alloca [4096 x i32], align 4, addrspace(5)
@@ -2294,8 +2296,8 @@ define amdgpu_kernel void @store_load_sindex_large_offset_kernel(i32 %idx) {
 ; GFX9-NEXT:    s_load_dword s0, s[0:1], 0x24
 ; GFX9-NEXT:    s_add_u32 flat_scratch_lo, s2, s5
 ; GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s3, 0
-; GFX9-NEXT:    s_mov_b32 vcc_hi, 0
-; GFX9-NEXT:    scratch_load_dword v0, off, vcc_hi offset:4 glc
+; GFX9-NEXT:    s_mov_b32 s2, 0
+; GFX9-NEXT:    scratch_load_dword v0, off, s2 offset:4 glc
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    s_lshl_b32 s1, s0, 2
 ; GFX9-NEXT:    s_and_b32 s0, s0, 15
@@ -2354,13 +2356,13 @@ define amdgpu_kernel void @store_load_sindex_large_offset_kernel(i32 %idx) {
 ; GFX9-PAL-NEXT:    s_getpc_b64 s[4:5]
 ; GFX9-PAL-NEXT:    s_mov_b32 s4, s0
 ; GFX9-PAL-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
-; GFX9-PAL-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-PAL-NEXT:    s_mov_b32 s2, 0
 ; GFX9-PAL-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; GFX9-PAL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-PAL-NEXT:    s_and_b32 s5, s5, 0xffff
 ; GFX9-PAL-NEXT:    s_add_u32 flat_scratch_lo, s4, s3
 ; GFX9-PAL-NEXT:    s_addc_u32 flat_scratch_hi, s5, 0
-; GFX9-PAL-NEXT:    scratch_load_dword v0, off, vcc_hi offset:4 glc
+; GFX9-PAL-NEXT:    scratch_load_dword v0, off, s2 offset:4 glc
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_lshl_b32 s1, s0, 2
 ; GFX9-PAL-NEXT:    s_and_b32 s0, s0, 15
@@ -2404,8 +2406,8 @@ define amdgpu_kernel void @store_load_sindex_large_offset_kernel(i32 %idx) {
 ; GFX1010-PAL-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s4
 ; GFX1010-PAL-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s5
 ; GFX1010-PAL-NEXT:    s_load_dword s0, s[0:1], 0x0
-; GFX1010-PAL-NEXT:    s_mov_b32 vcc_lo, 0
-; GFX1010-PAL-NEXT:    scratch_load_dword v0, off, vcc_lo offset:4 glc dlc
+; GFX1010-PAL-NEXT:    s_mov_b32 s2, 0
+; GFX1010-PAL-NEXT:    scratch_load_dword v0, off, s2 offset:4 glc dlc
 ; GFX1010-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX1010-PAL-NEXT:    v_mov_b32_e32 v0, 15
 ; GFX1010-PAL-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2482,8 +2484,8 @@ define amdgpu_ps void @store_load_sindex_large_offset_foo(i32 inreg %idx) {
 ; GFX9:       ; %bb.0: ; %bb
 ; GFX9-NEXT:    s_add_u32 flat_scratch_lo, s0, s3
 ; GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s1, 0
-; GFX9-NEXT:    s_mov_b32 vcc_hi, 0
-; GFX9-NEXT:    scratch_load_dword v0, off, vcc_hi offset:4 glc
+; GFX9-NEXT:    s_mov_b32 s1, 0
+; GFX9-NEXT:    scratch_load_dword v0, off, s1 offset:4 glc
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_lshl_b32 s0, s2, 2
 ; GFX9-NEXT:    s_addk_i32 s0, 0x4004
@@ -2538,12 +2540,12 @@ define amdgpu_ps void @store_load_sindex_large_offset_foo(i32 inreg %idx) {
 ; GFX9-PAL-NEXT:    s_getpc_b64 s[2:3]
 ; GFX9-PAL-NEXT:    s_mov_b32 s2, s0
 ; GFX9-PAL-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
-; GFX9-PAL-NEXT:    s_mov_b32 vcc_hi, 0
 ; GFX9-PAL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-PAL-NEXT:    s_and_b32 s3, s3, 0xffff
 ; GFX9-PAL-NEXT:    s_add_u32 flat_scratch_lo, s2, s1
 ; GFX9-PAL-NEXT:    s_addc_u32 flat_scratch_hi, s3, 0
-; GFX9-PAL-NEXT:    scratch_load_dword v0, off, vcc_hi offset:4 glc
+; GFX9-PAL-NEXT:    s_mov_b32 s2, 0
+; GFX9-PAL-NEXT:    scratch_load_dword v0, off, s2 offset:4 glc
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_lshl_b32 s1, s0, 2
 ; GFX9-PAL-NEXT:    s_and_b32 s0, s0, 15
@@ -2584,9 +2586,9 @@ define amdgpu_ps void @store_load_sindex_large_offset_foo(i32 inreg %idx) {
 ; GFX1010-PAL-NEXT:    s_addc_u32 s3, s3, 0
 ; GFX1010-PAL-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2
 ; GFX1010-PAL-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3
-; GFX1010-PAL-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX1010-PAL-NEXT:    s_mov_b32 s2, 0
 ; GFX1010-PAL-NEXT:    s_and_b32 s1, s0, 15
-; GFX1010-PAL-NEXT:    scratch_load_dword v0, off, vcc_lo offset:4 glc dlc
+; GFX1010-PAL-NEXT:    scratch_load_dword v0, off, s2 offset:4 glc dlc
 ; GFX1010-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX1010-PAL-NEXT:    v_mov_b32_e32 v0, 15
 ; GFX1010-PAL-NEXT:    s_lshl_b32 s0, s0, 2
@@ -2657,8 +2659,8 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel() {
 ; GFX9:       ; %bb.0: ; %bb
 ; GFX9-NEXT:    s_add_u32 flat_scratch_lo, s0, s3
 ; GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s1, 0
-; GFX9-NEXT:    s_mov_b32 vcc_hi, 0
-; GFX9-NEXT:    scratch_load_dword v1, off, vcc_hi offset:4 glc
+; GFX9-NEXT:    s_mov_b32 s0, 0
+; GFX9-NEXT:    scratch_load_dword v1, off, s0 offset:4 glc
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX9-NEXT:    v_add_u32_e32 v1, 0x4004, v0
@@ -2693,13 +2695,13 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel() {
 ; GFX11-LABEL: store_load_vindex_large_offset_kernel:
 ; GFX11:       ; %bb.0: ; %bb
 ; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-NEXT:    s_movk_i32 vcc_lo, 0x4004
+; GFX11-NEXT:    s_movk_i32 s0, 0x4004
 ; GFX11-NEXT:    scratch_load_b32 v3, off, off offset:4 glc dlc
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v1, 0x4004, v0
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-NEXT:    v_dual_mov_b32 v2, 15 :: v_dual_add_nc_u32 v1, 0x7c, v1
-; GFX11-NEXT:    scratch_store_b32 v0, v2, vcc_lo dlc
+; GFX11-NEXT:    scratch_store_b32 v0, v2, s0 dlc
 ; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX11-NEXT:    scratch_load_b32 v0, v1, off glc dlc
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
@@ -2710,14 +2712,14 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel() {
 ; GFX9-PAL-NEXT:    s_getpc_b64 s[2:3]
 ; GFX9-PAL-NEXT:    s_mov_b32 s2, s0
 ; GFX9-PAL-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
-; GFX9-PAL-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-PAL-NEXT:    s_mov_b32 s0, 0
 ; GFX9-PAL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX9-PAL-NEXT:    v_mov_b32_e32 v2, 15
 ; GFX9-PAL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-PAL-NEXT:    s_and_b32 s3, s3, 0xffff
 ; GFX9-PAL-NEXT:    s_add_u32 flat_scratch_lo, s2, s1
 ; GFX9-PAL-NEXT:    s_addc_u32 flat_scratch_hi, s3, 0
-; GFX9-PAL-NEXT:    scratch_load_dword v1, off, vcc_hi offset:4 glc
+; GFX9-PAL-NEXT:    scratch_load_dword v1, off, s0 offset:4 glc
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    v_add_u32_e32 v1, 0x4004, v0
 ; GFX9-PAL-NEXT:    v_sub_u32_e32 v0, 0x4004, v0
@@ -2734,8 +2736,8 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel() {
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX940-NEXT:    v_mov_b32_e32 v1, 15
-; GFX940-NEXT:    s_movk_i32 vcc_hi, 0x4004
-; GFX940-NEXT:    scratch_store_dword v0, v1, vcc_hi sc0 sc1
+; GFX940-NEXT:    s_movk_i32 s0, 0x4004
+; GFX940-NEXT:    scratch_store_dword v0, v1, s0 sc0 sc1
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-NEXT:    v_sub_u32_e32 v0, 0x4004, v0
 ; GFX940-NEXT:    v_add_u32_e32 v0, 0x7c, v0
@@ -2756,8 +2758,8 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel() {
 ; GFX1010-PAL-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3
 ; GFX1010-PAL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX1010-PAL-NEXT:    v_mov_b32_e32 v2, 15
-; GFX1010-PAL-NEXT:    s_mov_b32 vcc_lo, 0
-; GFX1010-PAL-NEXT:    scratch_load_dword v3, off, vcc_lo offset:4 glc dlc
+; GFX1010-PAL-NEXT:    s_mov_b32 s0, 0
+; GFX1010-PAL-NEXT:    scratch_load_dword v3, off, s0 offset:4 glc dlc
 ; GFX1010-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX1010-PAL-NEXT:    v_sub_nc_u32_e32 v1, 0x4004, v0
 ; GFX1010-PAL-NEXT:    v_add_nc_u32_e32 v0, 0x4004, v0
@@ -2795,13 +2797,13 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel() {
 ; GFX11-PAL-LABEL: store_load_vindex_large_offset_kernel:
 ; GFX11-PAL:       ; %bb.0: ; %bb
 ; GFX11-PAL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-PAL-NEXT:    s_movk_i32 vcc_lo, 0x4004
+; GFX11-PAL-NEXT:    s_movk_i32 s0, 0x4004
 ; GFX11-PAL-NEXT:    scratch_load_b32 v3, off, off offset:4 glc dlc
 ; GFX11-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-PAL-NEXT:    v_sub_nc_u32_e32 v1, 0x4004, v0
 ; GFX11-PAL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-PAL-NEXT:    v_dual_mov_b32 v2, 15 :: v_dual_add_nc_u32 v1, 0x7c, v1
-; GFX11-PAL-NEXT:    scratch_store_b32 v0, v2, vcc_lo dlc
+; GFX11-PAL-NEXT:    scratch_store_b32 v0, v2, s0 dlc
 ; GFX11-PAL-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX11-PAL-NEXT:    scratch_load_b32 v0, v1, off glc dlc
 ; GFX11-PAL-NEXT:    s_waitcnt vmcnt(0)
@@ -2827,8 +2829,8 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    scratch_load_dword v1, off, s32 offset:4 glc
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    s_add_i32 vcc_hi, s32, 0x4004
-; GFX9-NEXT:    v_mov_b32_e32 v1, vcc_hi
+; GFX9-NEXT:    s_add_i32 s0, s32, 0x4004
+; GFX9-NEXT:    v_mov_b32_e32 v1, s0
 ; GFX9-NEXT:    v_lshl_add_u32 v2, v0, 2, v1
 ; GFX9-NEXT:    v_mov_b32_e32 v3, 15
 ; GFX9-NEXT:    v_and_b32_e32 v0, 15, v0
@@ -2844,11 +2846,11 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) {
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-NEXT:    v_and_b32_e32 v1, 15, v0
+; GFX10-NEXT:    s_add_i32 s1, s32, 0x4004
 ; GFX10-NEXT:    s_add_i32 s0, s32, 0x4004
-; GFX10-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
-; GFX10-NEXT:    v_lshl_add_u32 v0, v0, 2, s0
+; GFX10-NEXT:    v_lshl_add_u32 v0, v0, 2, s1
 ; GFX10-NEXT:    v_mov_b32_e32 v2, 15
-; GFX10-NEXT:    v_lshl_add_u32 v1, v1, 2, vcc_lo
+; GFX10-NEXT:    v_lshl_add_u32 v1, v1, 2, s0
 ; GFX10-NEXT:    scratch_load_dword v3, off, s32 offset:4 glc dlc
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    scratch_store_dword v0, v2, off
@@ -2862,16 +2864,16 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX11-NEXT:    v_dual_mov_b32 v2, 15 :: v_dual_and_b32 v1, 15, v0
+; GFX11-NEXT:    s_add_i32 s1, s32, 0x4004
 ; GFX11-NEXT:    s_add_i32 s0, s32, 0x4004
-; GFX11-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
-; GFX11-NEXT:    v_lshl_add_u32 v0, v0, 2, s0
+; GFX11-NEXT:    v_lshl_add_u32 v0, v0, 2, s1
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
 ; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 2, v1
 ; GFX11-NEXT:    scratch_load_b32 v3, off, s32 offset:4 glc dlc
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    scratch_store_b32 v0, v2, off dlc
 ; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX11-NEXT:    scratch_load_b32 v0, v1, vcc_lo glc dlc
+; GFX11-NEXT:    scratch_load_b32 v0, v1, s0 glc dlc
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -2880,8 +2882,8 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) {
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-PAL-NEXT:    scratch_load_dword v1, off, s32 offset:4 glc
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-PAL-NEXT:    s_add_i32 vcc_hi, s32, 0x4004
-; GFX9-PAL-NEXT:    v_mov_b32_e32 v1, vcc_hi
+; GFX9-PAL-NEXT:    s_add_i32 s0, s32, 0x4004
+; GFX9-PAL-NEXT:    v_mov_b32_e32 v1, s0
 ; GFX9-PAL-NEXT:    v_lshl_add_u32 v2, v0, 2, v1
 ; GFX9-PAL-NEXT:    v_mov_b32_e32 v3, 15
 ; GFX9-PAL-NEXT:    v_and_b32_e32 v0, 15, v0
@@ -2897,16 +2899,16 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) {
 ; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX940-NEXT:    scratch_load_dword v1, off, s32 offset:4 sc0 sc1
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
-; GFX940-NEXT:    v_mov_b32_e32 v1, vcc_lo
+; GFX940-NEXT:    s_add_i32 s1, s32, 0x4004
+; GFX940-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX940-NEXT:    v_lshl_add_u32 v1, v0, 2, v1
 ; GFX940-NEXT:    v_mov_b32_e32 v2, 15
 ; GFX940-NEXT:    v_and_b32_e32 v0, 15, v0
 ; GFX940-NEXT:    scratch_store_dword v1, v2, off sc0 sc1
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX940-NEXT:    s_add_i32 vcc_hi, s32, 0x4004
-; GFX940-NEXT:    scratch_load_dword v0, v0, vcc_hi sc0 sc1
+; GFX940-NEXT:    s_add_i32 s0, s32, 0x4004
+; GFX940-NEXT:    scratch_load_dword v0, v0, s0 sc0 sc1
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -2915,11 +2917,11 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) {
 ; GFX10-PAL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-PAL-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-PAL-NEXT:    v_and_b32_e32 v1, 15, v0
+; GFX10-PAL-NEXT:    s_add_i32 s1, s32, 0x4004
 ; GFX10-PAL-NEXT:    s_add_i32 s0, s32, 0x4004
-; GFX10-PAL-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
-; GFX10-PAL-NEXT:    v_lshl_add_u32 v0, v0, 2, s0
+; GFX10-PAL-NEXT:    v_lshl_add_u32 v0, v0, 2, s1
 ; GFX10-PAL-NEXT:    v_mov_b32_e32 v2, 15
-; GFX10-PAL-NEXT:    v_lshl_add_u32 v1, v1, 2, vcc_lo
+; GFX10-PAL-NEXT:    v_lshl_add_u32 v1, v1, 2, s0
 ; GFX10-PAL-NEXT:    scratch_load_dword v3, off, s32 offset:4 glc dlc
 ; GFX10-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-PAL-NEXT:    scratch_store_dword v0, v2, off
@@ -2933,16 +2935,16 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) {
 ; GFX11-PAL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-PAL-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX11-PAL-NEXT:    v_dual_mov_b32 v2, 15 :: v_dual_and_b32 v1, 15, v0
+; GFX11-PAL-NEXT:    s_add_i32 s1, s32, 0x4004
 ; GFX11-PAL-NEXT:    s_add_i32 s0, s32, 0x4004
-; GFX11-PAL-NEXT:    s_add_i32 vcc_lo, s32, 0x4004
-; GFX11-PAL-NEXT:    v_lshl_add_u32 v0, v0, 2, s0
+; GFX11-PAL-NEXT:    v_lshl_add_u32 v0, v0, 2, s1
 ; GFX11-PAL-NEXT:    s_delay_alu instid0(VALU_DEP_2)
 ; GFX11-PAL-NEXT:    v_lshlrev_b32_e32 v1, 2, v1
 ; GFX11-PAL-NEXT:    scratch_load_b32 v3, off, s32 offset:4 glc dlc
 ; GFX11-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-PAL-NEXT:    scratch_store_b32 v0, v2, off dlc
 ; GFX11-PAL-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX11-PAL-NEXT:    scratch_load_b32 v0, v1, vcc_lo glc dlc
+; GFX11-PAL-NEXT:    scratch_load_b32 v0, v1, s0 glc dlc
 ; GFX11-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-PAL-NEXT:    s_setpc_b64 s[30:31]
 bb:
@@ -2964,9 +2966,9 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
 ; GFX9-NEXT:    s_add_u32 flat_scratch_lo, s0, s3
 ; GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s1, 0
 ; GFX9-NEXT:    v_mov_b32_e32 v0, 13
-; GFX9-NEXT:    s_mov_b32 vcc_hi, 0
+; GFX9-NEXT:    s_mov_b32 s1, 0
 ; GFX9-NEXT:    s_movk_i32 s0, 0x3000
-; GFX9-NEXT:    scratch_store_dword off, v0, vcc_hi offset:4
+; GFX9-NEXT:    scratch_store_dword off, v0, s1 offset:4
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_add_i32 s0, s0, 4
 ; GFX9-NEXT:    v_mov_b32_e32 v0, 15
@@ -3014,13 +3016,13 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
 ; GFX9-PAL-NEXT:    s_mov_b32 s2, s0
 ; GFX9-PAL-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
 ; GFX9-PAL-NEXT:    v_mov_b32_e32 v0, 13
-; GFX9-PAL-NEXT:    s_mov_b32 vcc_hi, 0
 ; GFX9-PAL-NEXT:    s_movk_i32 s0, 0x3000
 ; GFX9-PAL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-PAL-NEXT:    s_and_b32 s3, s3, 0xffff
 ; GFX9-PAL-NEXT:    s_add_u32 flat_scratch_lo, s2, s1
 ; GFX9-PAL-NEXT:    s_addc_u32 flat_scratch_hi, s3, 0
-; GFX9-PAL-NEXT:    scratch_store_dword off, v0, vcc_hi offset:4
+; GFX9-PAL-NEXT:    s_mov_b32 s1, 0
+; GFX9-PAL-NEXT:    scratch_store_dword off, v0, s1 offset:4
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_add_i32 s0, s0, 4
 ; GFX9-PAL-NEXT:    v_mov_b32_e32 v0, 15
@@ -3058,9 +3060,9 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
 ; GFX1010-PAL-NEXT:    v_mov_b32_e32 v0, 13
 ; GFX1010-PAL-NEXT:    v_mov_b32_e32 v1, 15
 ; GFX1010-PAL-NEXT:    s_movk_i32 s0, 0x3800
-; GFX1010-PAL-NEXT:    s_mov_b32 vcc_lo, 0
+; GFX1010-PAL-NEXT:    s_mov_b32 s1, 0
 ; GFX1010-PAL-NEXT:    s_add_i32 s0, s0, 4
-; GFX1010-PAL-NEXT:    scratch_store_dword off, v0, vcc_lo offset:4
+; GFX1010-PAL-NEXT:    scratch_store_dword off, v0, s1 offset:4
 ; GFX1010-PAL-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX1010-PAL-NEXT:    scratch_store_dword off, v1, s0 offset:1664
 ; GFX1010-PAL-NEXT:    s_waitcnt_vscnt null, 0x0
@@ -3121,10 +3123,10 @@ define void @store_load_large_imm_offset_foo() {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v0, 13
 ; GFX9-NEXT:    s_movk_i32 s0, 0x3000
-; GFX9-NEXT:    s_add_i32 vcc_hi, s32, 4
+; GFX9-NEXT:    s_add_i32 s1, s32, 4
 ; GFX9-NEXT:    scratch_store_dword off, v0, s32 offset:4
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    s_add_i32 s0, s0, vcc_hi
+; GFX9-NEXT:    s_add_i32 s0, s0, s1
 ; GFX9-NEXT:    v_mov_b32_e32 v0, 15
 ; GFX9-NEXT:    scratch_store_dword off, v0, s0 offset:3712
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
@@ -3139,8 +3141,8 @@ define void @store_load_large_imm_offset_foo() {
 ; GFX10-NEXT:    v_mov_b32_e32 v0, 13
 ; GFX10-NEXT:    v_mov_b32_e32 v1, 15
 ; GFX10-NEXT:    s_movk_i32 s0, 0x3800
-; GFX10-NEXT:    s_add_i32 vcc_lo, s32, 4
-; GFX10-NEXT:    s_add_i32 s0, s0, vcc_lo
+; GFX10-NEXT:    s_add_i32 s1, s32, 4
+; GFX10-NEXT:    s_add_i32 s0, s0, s1
 ; GFX10-NEXT:    scratch_store_dword off, v0, s32 offset:4
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-NEXT:    scratch_store_dword off, v1, s0 offset:1664
@@ -3155,9 +3157,9 @@ define void @store_load_large_imm_offset_foo() {
 ; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX11-NEXT:    v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
 ; GFX11-NEXT:    s_movk_i32 s0, 0x3000
-; GFX11-NEXT:    s_add_i32 vcc_lo, s32, 4
+; GFX11-NEXT:    s_add_i32 s1, s32, 4
 ; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX11-NEXT:    s_add_i32 s0, s0, vcc_lo
+; GFX11-NEXT:    s_add_i32 s0, s0, s1
 ; GFX11-NEXT:    scratch_store_b32 off, v0, s32 offset:4 dlc
 ; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX11-NEXT:    scratch_store_b32 off, v1, s0 offset:3712 dlc
@@ -3171,10 +3173,10 @@ define void @store_load_large_imm_offset_foo() {
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-PAL-NEXT:    v_mov_b32_e32 v0, 13
 ; GFX9-PAL-NEXT:    s_movk_i32 s0, 0x3000
-; GFX9-PAL-NEXT:    s_add_i32 vcc_hi, s32, 4
+; GFX9-PAL-NEXT:    s_add_i32 s1, s32, 4
 ; GFX9-PAL-NEXT:    scratch_store_dword off, v0, s32 offset:4
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-PAL-NEXT:    s_add_i32 s0, s0, vcc_hi
+; GFX9-PAL-NEXT:    s_add_i32 s0, s0, s1
 ; GFX9-PAL-NEXT:    v_mov_b32_e32 v0, 15
 ; GFX9-PAL-NEXT:    scratch_store_dword off, v0, s0 offset:3712
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
@@ -3187,10 +3189,10 @@ define void @store_load_large_imm_offset_foo() {
 ; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX940-NEXT:    v_mov_b32_e32 v0, 13
 ; GFX940-NEXT:    s_movk_i32 s0, 0x3000
-; GFX940-NEXT:    s_add_i32 vcc_hi, s32, 4
+; GFX940-NEXT:    s_add_i32 s1, s32, 4
 ; GFX940-NEXT:    scratch_store_dword off, v0, s32 offset:4 sc0 sc1
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_add_i32 s0, s0, vcc_hi
+; GFX940-NEXT:    s_add_i32 s0, s0, s1
 ; GFX940-NEXT:    v_mov_b32_e32 v0, 15
 ; GFX940-NEXT:    scratch_store_dword off, v0, s0 offset:3712 sc0 sc1
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
@@ -3205,8 +3207,8 @@ define void @store_load_large_imm_offset_foo() {
 ; GFX10-PAL-NEXT:    v_mov_b32_e32 v0, 13
 ; GFX10-PAL-NEXT:    v_mov_b32_e32 v1, 15
 ; GFX10-PAL-NEXT:    s_movk_i32 s0, 0x3800
-; GFX10-PAL-NEXT:    s_add_i32 vcc_lo, s32, 4
-; GFX10-PAL-NEXT:    s_add_i32 s0, s0, vcc_lo
+; GFX10-PAL-NEXT:    s_add_i32 s1, s32, 4
+; GFX10-PAL-NEXT:    s_add_i32 s0, s0, s1
 ; GFX10-PAL-NEXT:    scratch_store_dword off, v0, s32 offset:4
 ; GFX10-PAL-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-PAL-NEXT:    scratch_store_dword off, v1, s0 offset:1664
@@ -3221,9 +3223,9 @@ define void @store_load_large_imm_offset_foo() {
 ; GFX11-PAL-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX11-PAL-NEXT:    v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
 ; GFX11-PAL-NEXT:    s_movk_i32 s0, 0x3000
-; GFX11-PAL-NEXT:    s_add_i32 vcc_lo, s32, 4
+; GFX11-PAL-NEXT:    s_add_i32 s1, s32, 4
 ; GFX11-PAL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX11-PAL-NEXT:    s_add_i32 s0, s0, vcc_lo
+; GFX11-PAL-NEXT:    s_add_i32 s0, s0, s1
 ; GFX11-PAL-NEXT:    scratch_store_b32 off, v0, s32 offset:4 dlc
 ; GFX11-PAL-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX11-PAL-NEXT:    scratch_store_b32 off, v1, s0 offset:3712 dlc
@@ -3917,11 +3919,11 @@ define amdgpu_ps void @large_offset() {
 ; GFX9-NEXT:    v_mov_b32_e32 v1, v0
 ; GFX9-NEXT:    v_mov_b32_e32 v2, v0
 ; GFX9-NEXT:    v_mov_b32_e32 v3, v0
-; GFX9-NEXT:    s_mov_b32 vcc_lo, 0
-; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:3024
+; GFX9-NEXT:    s_mov_b32 s1, 0
+; GFX9-NEXT:    scratch_store_dwordx4 off, v[0:3], s1 offset:3024
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    s_mov_b32 vcc_hi, 0
-; GFX9-NEXT:    scratch_load_dwordx4 v[0:3], off, vcc_hi offset:3024 glc
+; GFX9-NEXT:    s_mov_b32 s0, 0
+; GFX9-NEXT:    scratch_load_dwordx4 v[0:3], off, s0 offset:3024 glc
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v0, 16
 ; GFX9-NEXT:    ;;#ASMSTART
@@ -3992,11 +3994,11 @@ define amdgpu_ps void @large_offset() {
 ; GFX9-PAL-NEXT:    s_and_b32 s3, s3, 0xffff
 ; GFX9-PAL-NEXT:    s_add_u32 flat_scratch_lo, s2, s0
 ; GFX9-PAL-NEXT:    s_addc_u32 flat_scratch_hi, s3, 0
-; GFX9-PAL-NEXT:    s_mov_b32 vcc_lo, 0
-; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], vcc_lo offset:3024
+; GFX9-PAL-NEXT:    s_mov_b32 s1, 0
+; GFX9-PAL-NEXT:    scratch_store_dwordx4 off, v[0:3], s1 offset:3024
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-PAL-NEXT:    s_mov_b32 vcc_hi, 0
-; GFX9-PAL-NEXT:    scratch_load_dwordx4 v[0:3], off, vcc_hi offset:3024 glc
+; GFX9-PAL-NEXT:    s_mov_b32 s0, 0
+; GFX9-PAL-NEXT:    scratch_load_dwordx4 v[0:3], off, s0 offset:3024 glc
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    v_mov_b32_e32 v0, 16
 ; GFX9-PAL-NEXT:    ;;#ASMSTART

diff  --git a/llvm/test/CodeGen/AMDGPU/frame-index.mir b/llvm/test/CodeGen/AMDGPU/frame-index.mir
index 28169c947f08d..eebd326d8e421 100644
--- a/llvm/test/CodeGen/AMDGPU/frame-index.mir
+++ b/llvm/test/CodeGen/AMDGPU/frame-index.mir
@@ -53,8 +53,8 @@ body:             |
     ; GCN-LABEL: name: func_add_constant_to_fi_uniform_i32
     ; GCN: liveins: $sgpr30_sgpr31
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: $vcc_hi = S_LSHR_B32 6, $sgpr32, implicit-def dead $scc
-    ; GCN-NEXT: renamable $sgpr4 = nuw S_ADD_I32 killed $vcc_hi, 4, implicit-def dead $scc
+    ; GCN-NEXT: $sgpr0 = S_LSHR_B32 6, $sgpr32, implicit-def dead $scc
+    ; GCN-NEXT: renamable $sgpr4 = nuw S_ADD_I32 killed $sgpr0, 4, implicit-def dead $scc
     ; GCN-NEXT: renamable $vgpr0 = COPY killed renamable $sgpr4, implicit $exec
     ; GCN-NEXT: $m0 = S_MOV_B32 -1
     ; GCN-NEXT: DS_WRITE_B32 undef renamable $vgpr0, killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec
@@ -89,12 +89,12 @@ body:             |
     ; GCN-LABEL: name: func_add_constant_to_fi_uniform_SCC_clobber_i32
     ; GCN: liveins: $sgpr30_sgpr31
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: $vcc_lo = S_LSHR_B32 6, $sgpr32, implicit-def dead $scc
-    ; GCN-NEXT: renamable $sgpr4 = nuw S_ADD_U32 killed $vcc_lo, 4, implicit-def $scc
+    ; GCN-NEXT: $sgpr1 = S_LSHR_B32 6, $sgpr32, implicit-def dead $scc
+    ; GCN-NEXT: renamable $sgpr4 = nuw S_ADD_U32 killed $sgpr1, 4, implicit-def $scc
     ; GCN-NEXT: renamable $sgpr5 = S_ADDC_U32 $sgpr4, 1234567, implicit-def $scc, implicit $scc
-    ; GCN-NEXT: $vcc_hi = S_LSHR_B32 $sgpr32, 6, implicit-def $scc
-    ; GCN-NEXT: $vcc_hi = S_ADD_I32 killed $vcc_hi, 8, implicit-def $scc
-    ; GCN-NEXT: renamable $sgpr6 = S_MUL_I32 killed $vcc_hi, $sgpr5
+    ; GCN-NEXT: $sgpr0 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc
+    ; GCN-NEXT: $sgpr0 = S_ADD_I32 killed $sgpr0, 8, implicit-def $scc
+    ; GCN-NEXT: renamable $sgpr6 = S_MUL_I32 killed $sgpr0, $sgpr5
     ; GCN-NEXT: renamable $vgpr0 = COPY killed renamable $sgpr6, implicit $exec
     ; GCN-NEXT: $m0 = S_MOV_B32 -1
     ; GCN-NEXT: DS_WRITE_B32 undef renamable $vgpr0, killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec
@@ -132,9 +132,9 @@ body:             |
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e32 7, implicit $exec
     ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 260, 0, 0, implicit $exec
-    ; GCN-NEXT: $vcc_hi = S_LSHR_B32 $sgpr32, 6, implicit-def $scc
-    ; GCN-NEXT: $vcc_hi = S_ADD_I32 killed $vcc_hi, 512, implicit-def $scc
-    ; GCN-NEXT: renamable $sgpr4 = S_MUL_I32 killed $vcc_hi, 9
+    ; GCN-NEXT: $sgpr5 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc
+    ; GCN-NEXT: $sgpr5 = S_ADD_I32 killed $sgpr5, 512, implicit-def $scc
+    ; GCN-NEXT: renamable $sgpr4 = S_MUL_I32 killed $sgpr5, 9
     ; GCN-NEXT: renamable $vgpr0 = COPY killed renamable $sgpr4, implicit $exec
     ; GCN-NEXT: $m0 = S_MOV_B32 -1
     ; GCN-NEXT: DS_WRITE_B32 undef renamable $vgpr0, killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec
@@ -168,8 +168,8 @@ body:             |
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: renamable $sgpr4 = nuw S_ADD_U32 $sgpr10, 4, implicit-def $scc
     ; GCN-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
-    ; GCN-NEXT: $vcc_hi = V_READFIRSTLANE_B32 $vgpr1, implicit $exec
-    ; GCN-NEXT: renamable $sgpr5 = S_MUL_I32 killed $vcc_hi, $sgpr4
+    ; GCN-NEXT: $sgpr0 = V_READFIRSTLANE_B32 $vgpr1, implicit $exec
+    ; GCN-NEXT: renamable $sgpr5 = S_MUL_I32 killed $sgpr0, $sgpr4
     ; GCN-NEXT: renamable $sgpr6 = S_ADDC_U32 $sgpr5, 1234567, implicit-def dead $scc, implicit $scc
     ; GCN-NEXT: renamable $vgpr0 = COPY killed renamable $sgpr6, implicit $exec
     ; GCN-NEXT: $m0 = S_MOV_B32 -1

diff  --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll
index 7f481b2a134e2..a303ee4a4cc08 100644
--- a/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll
+++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll
@@ -6939,12 +6939,12 @@ define amdgpu_gfx void @test_call_external_void_func_sret_struct_i8_i32_byval_st
 ; GFX11-NEXT:    s_getpc_b64 s[0:1]
 ; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32 at rel32@lo+4
 ; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32 at rel32@hi+12
-; GFX11-NEXT:    s_add_i32 vcc_lo, s33, 8
+; GFX11-NEXT:    s_add_i32 s2, s33, 8
 ; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
 ; GFX11-NEXT:    s_clause 0x1
 ; GFX11-NEXT:    scratch_store_b8 off, v0, s33
 ; GFX11-NEXT:    scratch_store_b32 off, v1, s33 offset:4
-; GFX11-NEXT:    v_dual_mov_b32 v0, vcc_lo :: v_dual_mov_b32 v1, s33
+; GFX11-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s33
 ; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
 ; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
 ; GFX11-NEXT:    s_clause 0x1
@@ -6986,11 +6986,11 @@ define amdgpu_gfx void @test_call_external_void_func_sret_struct_i8_i32_byval_st
 ; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
 ; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32 at rel32@lo+4
 ; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32 at rel32@hi+12
-; GFX10-SCRATCH-NEXT:    s_add_i32 vcc_lo, s33, 8
+; GFX10-SCRATCH-NEXT:    s_add_i32 s2, s33, 8
 ; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 0
 ; GFX10-SCRATCH-NEXT:    scratch_store_byte off, v0, s33
 ; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v1, s33 offset:4
-; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v0, vcc_lo
+; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v1, s33
 ; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 1
 ; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]

diff  --git a/llvm/test/CodeGen/AMDGPU/load-lo16.ll b/llvm/test/CodeGen/AMDGPU/load-lo16.ll
index 8f01d1db64cc4..8bf28868156ea 100644
--- a/llvm/test/CodeGen/AMDGPU/load-lo16.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-lo16.ll
@@ -2038,8 +2038,8 @@ define void @load_private_lo_v2i16_reglo_vreg_to_offset(i32 %reg) #0 {
 ; GFX900-FLATSCR-NEXT:    v_mov_b32_e32 v1, 0x7b
 ; GFX900-FLATSCR-NEXT:    scratch_store_dword off, v1, s32 offset:4
 ; GFX900-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-FLATSCR-NEXT:    s_add_i32 vcc_hi, s32, 44
-; GFX900-FLATSCR-NEXT:    scratch_load_short_d16 v0, off, vcc_hi offset:4054 glc
+; GFX900-FLATSCR-NEXT:    s_add_i32 s0, s32, 44
+; GFX900-FLATSCR-NEXT:    scratch_load_short_d16 v0, off, s0 offset:4054 glc
 ; GFX900-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
 ; GFX900-FLATSCR-NEXT:    global_store_dword v[0:1], v0, off
 ; GFX900-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
@@ -2106,8 +2106,8 @@ define void @load_private_lo_v2i16_reglo_vreg_sexti8_to_offset(i32 %reg) #0 {
 ; GFX900-FLATSCR-NEXT:    v_mov_b32_e32 v1, 0x7b
 ; GFX900-FLATSCR-NEXT:    scratch_store_dword off, v1, s32 offset:4
 ; GFX900-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-FLATSCR-NEXT:    s_add_i32 vcc_hi, s32, 44
-; GFX900-FLATSCR-NEXT:    scratch_load_sbyte_d16 v0, off, vcc_hi offset:4055 glc
+; GFX900-FLATSCR-NEXT:    s_add_i32 s0, s32, 44
+; GFX900-FLATSCR-NEXT:    scratch_load_sbyte_d16 v0, off, s0 offset:4055 glc
 ; GFX900-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
 ; GFX900-FLATSCR-NEXT:    global_store_dword v[0:1], v0, off
 ; GFX900-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
@@ -2175,8 +2175,8 @@ define void @load_private_lo_v2i16_reglo_vreg_zexti8_to_offset(i32 %reg) #0 {
 ; GFX900-FLATSCR-NEXT:    v_mov_b32_e32 v1, 0x7b
 ; GFX900-FLATSCR-NEXT:    scratch_store_dword off, v1, s32 offset:4
 ; GFX900-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-FLATSCR-NEXT:    s_add_i32 vcc_hi, s32, 44
-; GFX900-FLATSCR-NEXT:    scratch_load_ubyte_d16 v0, off, vcc_hi offset:4055 glc
+; GFX900-FLATSCR-NEXT:    s_add_i32 s0, s32, 44
+; GFX900-FLATSCR-NEXT:    scratch_load_ubyte_d16 v0, off, s0 offset:4055 glc
 ; GFX900-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
 ; GFX900-FLATSCR-NEXT:    global_store_dword v[0:1], v0, off
 ; GFX900-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
@@ -2244,8 +2244,8 @@ define void @load_private_lo_v2f16_reglo_vreg_sexti8_to_offset(i32 %reg) #0 {
 ; GFX900-FLATSCR-NEXT:    v_mov_b32_e32 v1, 0x7b
 ; GFX900-FLATSCR-NEXT:    scratch_store_dword off, v1, s32 offset:4
 ; GFX900-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-FLATSCR-NEXT:    s_add_i32 vcc_hi, s32, 44
-; GFX900-FLATSCR-NEXT:    scratch_load_sbyte_d16 v0, off, vcc_hi offset:4055 glc
+; GFX900-FLATSCR-NEXT:    s_add_i32 s0, s32, 44
+; GFX900-FLATSCR-NEXT:    scratch_load_sbyte_d16 v0, off, s0 offset:4055 glc
 ; GFX900-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
 ; GFX900-FLATSCR-NEXT:    global_store_dword v[0:1], v0, off
 ; GFX900-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
@@ -2314,8 +2314,8 @@ define void @load_private_lo_v2f16_reglo_vreg_zexti8_to_offset(i32 %reg) #0 {
 ; GFX900-FLATSCR-NEXT:    v_mov_b32_e32 v1, 0x7b
 ; GFX900-FLATSCR-NEXT:    scratch_store_dword off, v1, s32 offset:4
 ; GFX900-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-FLATSCR-NEXT:    s_add_i32 vcc_hi, s32, 44
-; GFX900-FLATSCR-NEXT:    scratch_load_ubyte_d16 v0, off, vcc_hi offset:4055 glc
+; GFX900-FLATSCR-NEXT:    s_add_i32 s0, s32, 44
+; GFX900-FLATSCR-NEXT:    scratch_load_ubyte_d16 v0, off, s0 offset:4055 glc
 ; GFX900-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
 ; GFX900-FLATSCR-NEXT:    global_store_dword v[0:1], v0, off
 ; GFX900-FLATSCR-NEXT:    s_waitcnt vmcnt(0)

diff  --git a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
index df126d5c3b64b..72a86c24235f4 100644
--- a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
+++ b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
@@ -61,9 +61,9 @@ define amdgpu_kernel void @local_stack_offset_uses_sp(ptr addrspace(1) %out) {
 ; FLATSCR-NEXT:    s_add_u32 flat_scratch_lo, s2, s5
 ; FLATSCR-NEXT:    s_addc_u32 flat_scratch_hi, s3, 0
 ; FLATSCR-NEXT:    v_mov_b32_e32 v0, 0
-; FLATSCR-NEXT:    s_movk_i32 vcc_hi, 0x2000
+; FLATSCR-NEXT:    s_movk_i32 s3, 0x2000
 ; FLATSCR-NEXT:    s_mov_b32 s2, 0
-; FLATSCR-NEXT:    scratch_store_dword off, v0, vcc_hi
+; FLATSCR-NEXT:    scratch_store_dword off, v0, s3
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
 ; FLATSCR-NEXT:  .LBB0_1: ; %loadstoreloop
 ; FLATSCR-NEXT:    ; =>This Inner Loop Header: Depth=1
@@ -155,14 +155,14 @@ define void @func_local_stack_offset_uses_sp(ptr addrspace(1) %out) {
 ; FLATSCR-NEXT:    s_and_b32 s33, s33, 0xffffe000
 ; FLATSCR-NEXT:    s_add_i32 s32, s32, 0x8000
 ; FLATSCR-NEXT:    v_mov_b32_e32 v2, 0
-; FLATSCR-NEXT:    s_add_i32 vcc_hi, s33, 0x2000
+; FLATSCR-NEXT:    s_add_i32 s1, s33, 0x2000
 ; FLATSCR-NEXT:    s_mov_b32 s0, 0
-; FLATSCR-NEXT:    scratch_store_dword off, v2, vcc_hi
+; FLATSCR-NEXT:    scratch_store_dword off, v2, s1
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
 ; FLATSCR-NEXT:  .LBB1_1: ; %loadstoreloop
 ; FLATSCR-NEXT:    ; =>This Inner Loop Header: Depth=1
-; FLATSCR-NEXT:    s_add_i32 vcc_hi, s33, 0x3000
-; FLATSCR-NEXT:    s_add_i32 s1, s0, vcc_hi
+; FLATSCR-NEXT:    s_add_i32 s3, s33, 0x3000
+; FLATSCR-NEXT:    s_add_i32 s1, s0, s3
 ; FLATSCR-NEXT:    s_add_i32 s0, s0, 1
 ; FLATSCR-NEXT:    s_cmpk_lt_u32 s0, 0x2120
 ; FLATSCR-NEXT:    scratch_store_byte off, v2, s1
@@ -275,9 +275,9 @@ define amdgpu_kernel void @local_stack_offset_uses_sp_flat(ptr addrspace(1) %out
 ; FLATSCR-NEXT:    s_add_u32 flat_scratch_lo, s2, s5
 ; FLATSCR-NEXT:    s_addc_u32 flat_scratch_hi, s3, 0
 ; FLATSCR-NEXT:    v_mov_b32_e32 v0, 0
-; FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
+; FLATSCR-NEXT:    s_mov_b32 s3, 0
 ; FLATSCR-NEXT:    s_mov_b32 s2, 0
-; FLATSCR-NEXT:    scratch_store_dword off, v0, vcc_hi offset:1024
+; FLATSCR-NEXT:    scratch_store_dword off, v0, s3 offset:1024
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
 ; FLATSCR-NEXT:  .LBB2_1: ; %loadstoreloop
 ; FLATSCR-NEXT:    ; =>This Inner Loop Header: Depth=1

diff  --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir
index 5212187b17216..4a69e7ac04a09 100644
--- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir
+++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir
@@ -87,10 +87,10 @@ body:             |
     ; GFX9-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7
     ; GFX9-FLATSCR-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr4, 0, undef $vgpr2
     ; GFX9-FLATSCR-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc
-    ; GFX9-FLATSCR-NEXT: $vcc_lo = S_ADD_I32 $sgpr33, 8192, implicit-def $scc
-    ; GFX9-FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 killed $vcc_lo, implicit $exec
-    ; GFX9-FLATSCR-NEXT: $vcc_hi = S_ADD_I32 $sgpr33, 16384, implicit-def $scc
-    ; GFX9-FLATSCR-NEXT: $vgpr0 = V_OR_B32_e32 killed $vcc_hi, $vgpr1, implicit $exec
+    ; GFX9-FLATSCR-NEXT: $sgpr9 = S_ADD_I32 $sgpr33, 8192, implicit-def $scc
+    ; GFX9-FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr9, implicit $exec
+    ; GFX9-FLATSCR-NEXT: $sgpr8 = S_ADD_I32 $sgpr33, 16384, implicit-def $scc
+    ; GFX9-FLATSCR-NEXT: $vgpr0 = V_OR_B32_e32 killed $sgpr8, $vgpr1, implicit $exec
     ; GFX9-FLATSCR-NEXT: $sgpr4 = V_READLANE_B32 $vgpr2, 0
     ; GFX9-FLATSCR-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
     ; GFX9-FLATSCR-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 16388, implicit-def dead $scc

diff  --git a/llvm/test/CodeGen/AMDGPU/scratch-simple.ll b/llvm/test/CodeGen/AMDGPU/scratch-simple.ll
index cddf456777d51..78880428278cd 100644
--- a/llvm/test/CodeGen/AMDGPU/scratch-simple.ll
+++ b/llvm/test/CodeGen/AMDGPU/scratch-simple.ll
@@ -34,7 +34,7 @@
 ; GFX9-FLATSCR-PAL-DAG: s_load_dwordx2 s[2:3], s[2:3], 0x0
 ; GFX9-FLATSCR-PAL-DAG: v_lshlrev_b32_e32 v0, 2, v0
 ; GFX9-FLATSCR-PAL-DAG: v_mov_b32_e32 v0, 0xbf20e7f4
-; GFX9-FLATSCR-PAL-DAG: s_mov_b32 vcc_hi, 0
+; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s1, 0
 ; GFX9-FLATSCR-PAL-DAG: s_waitcnt lgkmcnt(0)
 ; GFX9-FLATSCR-PAL-DAG: s_and_b32 s3, s3, 0xffff
 ; GFX9-FLATSCR-PAL-DAG: s_add_u32 flat_scratch_lo, s2, s0
@@ -102,7 +102,7 @@ define amdgpu_ps float @ps_main(i32 %idx) {
 ; GFX9-FLATSCR-PAL-DAG: s_load_dwordx2 s[2:3], s[2:3], 0x0
 ; GFX9-FLATSCR-PAL-DAG: v_lshlrev_b32_e32 v0, 2, v0
 ; GFX9-FLATSCR-PAL-DAG: v_mov_b32_e32 v0, 0xbf20e7f4
-; GFX9-FLATSCR-PAL-DAG: s_mov_b32 vcc_hi, 0
+; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s1, 0
 ; GFX9-FLATSCR-PAL-DAG: s_waitcnt lgkmcnt(0)
 ; GFX9-FLATSCR-PAL-DAG: s_and_b32 s3, s3, 0xffff
 ; GFX9-FLATSCR-PAL-DAG: s_add_u32 flat_scratch_lo, s2, s0
@@ -155,7 +155,7 @@ define amdgpu_vs float @vs_main(i32 %idx) {
 ; GFX9-FLATSCR-PAL-DAG: s_load_dwordx2 s[2:3], s[2:3], 0x10
 ; GFX9-FLATSCR-PAL-DAG: v_lshlrev_b32_e32 v0, 2, v0
 ; GFX9-FLATSCR-PAL-DAG: v_mov_b32_e32 v0, 0xbf20e7f4
-; GFX9-FLATSCR-PAL-DAG: s_mov_b32 vcc_hi, 0
+; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s1, 0
 ; GFX9-FLATSCR-PAL-DAG: s_waitcnt lgkmcnt(0)
 ; GFX9-FLATSCR-PAL-DAG: s_and_b32 s3, s3, 0xffff
 ; GFX9-FLATSCR-PAL-DAG: s_add_u32 flat_scratch_lo, s2, s0
@@ -237,7 +237,7 @@ define amdgpu_hs float @hs_main(i32 %idx) {
 ; GFX9-FLATSCR-PAL-DAG: s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX9-FLATSCR-PAL-DAG: v_lshlrev_b32_e32 v0, 2, v0
 ; GFX9-FLATSCR-PAL-DAG: v_mov_b32_e32 v0, 0xbf20e7f4
-; GFX9-FLATSCR-PAL-DAG: s_mov_b32 vcc_hi, 0
+; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s1, 0
 ; GFX9-FLATSCR-PAL-DAG: s_waitcnt lgkmcnt(0)
 ; GFX9-FLATSCR-PAL-DAG: s_and_b32 s1, s1, 0xffff
 ; GFX9-FLATSCR-PAL-DAG: s_add_u32 flat_scratch_lo, s0, s5
@@ -293,7 +293,7 @@ define amdgpu_gs float @gs_main(i32 %idx) {
 ; GFX9-FLATSCR-PAL-DAG: s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX9-FLATSCR-PAL-DAG: v_lshlrev_b32_e32 v0, 2, v0
 ; GFX9-FLATSCR-PAL-DAG: v_mov_b32_e32 v0, 0xbf20e7f4
-; GFX9-FLATSCR-PAL-DAG: s_mov_b32 vcc_hi, 0
+; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s1, 0
 ; GFX9-FLATSCR-PAL-DAG: s_waitcnt lgkmcnt(0)
 ; GFX9-FLATSCR-PAL-DAG: s_and_b32 s1, s1, 0xffff
 ; GFX9-FLATSCR-PAL-DAG: s_add_u32 flat_scratch_lo, s0, s5
@@ -316,7 +316,6 @@ define amdgpu_gs float @gs_main(i32 %idx) {
 ; SIVI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
 ; SIVI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
 
-; GFX9PLUS-NOT: s_mov_b32 s5
 ; GFX9_10-MUBUF-DAG: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
 ; GFX9_10-MUBUF-DAG: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
 
@@ -350,7 +349,7 @@ define amdgpu_hs <{i32, i32, i32, float}> @hs_ir_uses_scratch_offset(i32 inreg,
 ; GFX9-FLATSCR-PAL-DAG: s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX9-FLATSCR-PAL-DAG: v_lshlrev_b32_e32 v0, 2, v0
 ; GFX9-FLATSCR-PAL-DAG: v_mov_b32_e32 v0, 0xbf20e7f4
-; GFX9-FLATSCR-PAL-DAG: s_mov_b32 vcc_hi, 0
+; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s1, 0
 ; GFX9-FLATSCR-PAL-DAG: s_waitcnt lgkmcnt(0)
 ; GFX9-FLATSCR-PAL-DAG: s_and_b32 s1, s1, 0xffff
 ; GFX9-FLATSCR-PAL-DAG: s_add_u32 flat_scratch_lo, s0, s5

diff  --git a/llvm/test/CodeGen/AMDGPU/spill-offset-calculation.ll b/llvm/test/CodeGen/AMDGPU/spill-offset-calculation.ll
index e9d39b3fe4455..1fbdb5d512da3 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-offset-calculation.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-offset-calculation.ll
@@ -26,17 +26,17 @@ define amdgpu_kernel void @test_inst_offset_kernel() {
 ; FLATSCR:       ; %bb.0: ; %entry
 ; FLATSCR-NEXT:    s_add_u32 flat_scratch_lo, s0, s3
 ; FLATSCR-NEXT:    s_addc_u32 flat_scratch_hi, s1, 0
-; FLATSCR-NEXT:    s_mov_b32 vcc_lo, 0
-; FLATSCR-NEXT:    scratch_load_dword v0, off, vcc_lo offset:8 glc
+; FLATSCR-NEXT:    s_mov_b32 s1, 0
+; FLATSCR-NEXT:    scratch_load_dword v0, off, s1 offset:8 glc
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
 ; FLATSCR-NEXT:    s_movk_i32 s0, 0xffc
-; FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
 ; FLATSCR-NEXT:    scratch_store_dword off, v0, s0 ; 4-byte Folded Spill
 ; FLATSCR-NEXT:    ;;#ASMSTART
 ; FLATSCR-NEXT:    ;;#ASMEND
 ; FLATSCR-NEXT:    scratch_load_dword v0, off, s0 ; 4-byte Folded Reload
+; FLATSCR-NEXT:    s_mov_b32 s0, 0
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; FLATSCR-NEXT:    scratch_store_dword off, v0, vcc_hi offset:8
+; FLATSCR-NEXT:    scratch_store_dword off, v0, s0 offset:8
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
 ; FLATSCR-NEXT:    s_endpgm
 entry:
@@ -79,17 +79,17 @@ define amdgpu_kernel void @test_sgpr_offset_kernel() {
 ; FLATSCR:       ; %bb.0: ; %entry
 ; FLATSCR-NEXT:    s_add_u32 flat_scratch_lo, s0, s3
 ; FLATSCR-NEXT:    s_addc_u32 flat_scratch_hi, s1, 0
-; FLATSCR-NEXT:    s_mov_b32 vcc_lo, 0
-; FLATSCR-NEXT:    scratch_load_dword v0, off, vcc_lo offset:8 glc
+; FLATSCR-NEXT:    s_mov_b32 s1, 0
+; FLATSCR-NEXT:    scratch_load_dword v0, off, s1 offset:8 glc
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
 ; FLATSCR-NEXT:    s_movk_i32 s0, 0x1000
-; FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
 ; FLATSCR-NEXT:    scratch_store_dword off, v0, s0 ; 4-byte Folded Spill
 ; FLATSCR-NEXT:    ;;#ASMSTART
 ; FLATSCR-NEXT:    ;;#ASMEND
 ; FLATSCR-NEXT:    scratch_load_dword v0, off, s0 ; 4-byte Folded Reload
+; FLATSCR-NEXT:    s_mov_b32 s0, 0
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; FLATSCR-NEXT:    scratch_store_dword off, v0, vcc_hi offset:8
+; FLATSCR-NEXT:    scratch_store_dword off, v0, s0 offset:8
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
 ; FLATSCR-NEXT:    s_endpgm
 entry:
@@ -218,10 +218,10 @@ define amdgpu_kernel void @test_sgpr_offset_function_scavenge_fail_kernel() #3 {
 ; FLATSCR:       ; %bb.0: ; %entry
 ; FLATSCR-NEXT:    s_add_u32 flat_scratch_lo, s0, s3
 ; FLATSCR-NEXT:    s_addc_u32 flat_scratch_hi, s1, 0
-; FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
+; FLATSCR-NEXT:    s_mov_b32 s9, 0
 ; FLATSCR-NEXT:    ;;#ASMSTART
 ; FLATSCR-NEXT:    ;;#ASMEND
-; FLATSCR-NEXT:    scratch_load_dword v0, off, vcc_hi offset:8 glc
+; FLATSCR-NEXT:    scratch_load_dword v0, off, s9 offset:8 glc
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
 ; FLATSCR-NEXT:    s_movk_i32 s8, 0x1004
 ; FLATSCR-NEXT:    scratch_store_dword off, v0, s8 ; 4-byte Folded Spill
@@ -301,15 +301,15 @@ define amdgpu_kernel void @test_sgpr_offset_subregs_kernel() {
 ; FLATSCR:       ; %bb.0: ; %entry
 ; FLATSCR-NEXT:    s_add_u32 flat_scratch_lo, s0, s3
 ; FLATSCR-NEXT:    s_addc_u32 flat_scratch_hi, s1, 0
-; FLATSCR-NEXT:    s_mov_b32 vcc_lo, 0
-; FLATSCR-NEXT:    scratch_load_dwordx2 v[0:1], off, vcc_lo offset:12 glc
+; FLATSCR-NEXT:    s_mov_b32 s2, 0
+; FLATSCR-NEXT:    scratch_load_dwordx2 v[0:1], off, s2 offset:12 glc
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
 ; FLATSCR-NEXT:    s_movk_i32 s0, 0xff8
-; FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
+; FLATSCR-NEXT:    s_mov_b32 s1, 0
 ; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s0 ; 8-byte Folded Spill
 ; FLATSCR-NEXT:    ;;#ASMSTART
 ; FLATSCR-NEXT:    ;;#ASMEND
-; FLATSCR-NEXT:    scratch_load_dword v0, off, vcc_hi offset:8 glc
+; FLATSCR-NEXT:    scratch_load_dword v0, off, s1 offset:8 glc
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
 ; FLATSCR-NEXT:    scratch_load_dwordx2 v[0:1], off, s0 ; 8-byte Folded Reload
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
@@ -367,15 +367,15 @@ define amdgpu_kernel void @test_inst_offset_subregs_kernel() {
 ; FLATSCR:       ; %bb.0: ; %entry
 ; FLATSCR-NEXT:    s_add_u32 flat_scratch_lo, s0, s3
 ; FLATSCR-NEXT:    s_addc_u32 flat_scratch_hi, s1, 0
-; FLATSCR-NEXT:    s_mov_b32 vcc_lo, 0
-; FLATSCR-NEXT:    scratch_load_dwordx2 v[0:1], off, vcc_lo offset:12 glc
+; FLATSCR-NEXT:    s_mov_b32 s2, 0
+; FLATSCR-NEXT:    scratch_load_dwordx2 v[0:1], off, s2 offset:12 glc
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
 ; FLATSCR-NEXT:    s_movk_i32 s0, 0xffc
-; FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
+; FLATSCR-NEXT:    s_mov_b32 s1, 0
 ; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s0 ; 8-byte Folded Spill
 ; FLATSCR-NEXT:    ;;#ASMSTART
 ; FLATSCR-NEXT:    ;;#ASMEND
-; FLATSCR-NEXT:    scratch_load_dword v0, off, vcc_hi offset:8 glc
+; FLATSCR-NEXT:    scratch_load_dword v0, off, s1 offset:8 glc
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
 ; FLATSCR-NEXT:    scratch_load_dwordx2 v[0:1], off, s0 ; 8-byte Folded Reload
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0)

diff  --git a/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll b/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll
index 31aee3f6a5070..8ac9fd0f71b09 100644
--- a/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll
+++ b/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll
@@ -68,9 +68,9 @@ define amdgpu_kernel void @kernel_background_evaluate(ptr addrspace(5) %kg, ptr
 ; FLATSCR-NEXT:    s_and_saveexec_b32 s0, vcc_lo
 ; FLATSCR-NEXT:    s_cbranch_execz .LBB0_2
 ; FLATSCR-NEXT:  ; %bb.1: ; %if.then4.i
-; FLATSCR-NEXT:    s_movk_i32 vcc_lo, 0x4000
+; FLATSCR-NEXT:    s_movk_i32 s1, 0x4000
 ; FLATSCR-NEXT:    s_mov_b32 s0, 0x41c64e6d
-; FLATSCR-NEXT:    scratch_load_dwordx2 v[0:1], off, vcc_lo offset:4
+; FLATSCR-NEXT:    scratch_load_dwordx2 v[0:1], off, s1 offset:4
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
 ; FLATSCR-NEXT:    v_add_nc_u32_e32 v0, v1, v0
 ; FLATSCR-NEXT:    v_mad_u64_u32 v[0:1], s0, v0, s0, 0x3039
@@ -95,9 +95,9 @@ define amdgpu_kernel void @kernel_background_evaluate(ptr addrspace(5) %kg, ptr
 ; MUBUF11-NEXT:    v_cmpx_ne_u32_e32 0, v0
 ; MUBUF11-NEXT:    s_cbranch_execz .LBB0_2
 ; MUBUF11-NEXT:  ; %bb.1: ; %if.then4.i
-; MUBUF11-NEXT:    s_movk_i32 vcc_lo, 0x4000
+; MUBUF11-NEXT:    s_movk_i32 s1, 0x4000
 ; MUBUF11-NEXT:    s_mov_b32 s0, 0x41c64e6d
-; MUBUF11-NEXT:    scratch_load_b64 v[0:1], off, vcc_lo offset:4
+; MUBUF11-NEXT:    scratch_load_b64 v[0:1], off, s1 offset:4
 ; MUBUF11-NEXT:    s_waitcnt vmcnt(0)
 ; MUBUF11-NEXT:    v_add_nc_u32_e32 v2, v1, v0
 ; MUBUF11-NEXT:    v_mad_u64_u32 v[0:1], null, v2, s0, 0x3039
@@ -123,9 +123,9 @@ define amdgpu_kernel void @kernel_background_evaluate(ptr addrspace(5) %kg, ptr
 ; FLATSCR11-NEXT:    v_cmpx_ne_u32_e32 0, v0
 ; FLATSCR11-NEXT:    s_cbranch_execz .LBB0_2
 ; FLATSCR11-NEXT:  ; %bb.1: ; %if.then4.i
-; FLATSCR11-NEXT:    s_movk_i32 vcc_lo, 0x4000
+; FLATSCR11-NEXT:    s_movk_i32 s1, 0x4000
 ; FLATSCR11-NEXT:    s_mov_b32 s0, 0x41c64e6d
-; FLATSCR11-NEXT:    scratch_load_b64 v[0:1], off, vcc_lo offset:4
+; FLATSCR11-NEXT:    scratch_load_b64 v[0:1], off, s1 offset:4
 ; FLATSCR11-NEXT:    s_waitcnt vmcnt(0)
 ; FLATSCR11-NEXT:    v_add_nc_u32_e32 v2, v1, v0
 ; FLATSCR11-NEXT:    v_mad_u64_u32 v[0:1], null, v2, s0, 0x3039

diff  --git a/llvm/test/CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir b/llvm/test/CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir
index b036d4c9ec139..93b98df2f7dba 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir
@@ -1085,10 +1085,10 @@ body:             |
   ; GFX9-FLATSCR-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX9-FLATSCR-NEXT:   $vcc_hi = S_ADDC_U32 $sgpr32, 8200, implicit-def $scc, implicit $scc
-  ; GFX9-FLATSCR-NEXT:   S_BITCMP1_B32 $vcc_hi, 0, implicit-def $scc
-  ; GFX9-FLATSCR-NEXT:   $vcc_hi = S_BITSET0_B32 0, $vcc_hi
-  ; GFX9-FLATSCR-NEXT:   $vgpr1 = V_MOV_B32_e32 killed $vcc_hi, implicit $exec
+  ; GFX9-FLATSCR-NEXT:   $sgpr4 = S_ADDC_U32 $sgpr32, 8200, implicit-def $scc, implicit $scc
+  ; GFX9-FLATSCR-NEXT:   S_BITCMP1_B32 $sgpr4, 0, implicit-def $scc
+  ; GFX9-FLATSCR-NEXT:   $sgpr4 = S_BITSET0_B32 0, $sgpr4
+  ; GFX9-FLATSCR-NEXT:   $vgpr1 = V_MOV_B32_e32 killed $sgpr4, implicit $exec
   ; GFX9-FLATSCR-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
   ; GFX9-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX9-FLATSCR-NEXT: {{  $}}
@@ -1104,10 +1104,10 @@ body:             |
   ; GFX10-FLATSCR-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX10-FLATSCR-NEXT:   $vcc_lo = S_ADDC_U32 $sgpr32, 8200, implicit-def $scc, implicit $scc
-  ; GFX10-FLATSCR-NEXT:   S_BITCMP1_B32 $vcc_lo, 0, implicit-def $scc
-  ; GFX10-FLATSCR-NEXT:   $vcc_lo = S_BITSET0_B32 0, $vcc_lo
-  ; GFX10-FLATSCR-NEXT:   $vgpr1 = V_MOV_B32_e32 killed $vcc_lo, implicit $exec
+  ; GFX10-FLATSCR-NEXT:   $sgpr4 = S_ADDC_U32 $sgpr32, 8200, implicit-def $scc, implicit $scc
+  ; GFX10-FLATSCR-NEXT:   S_BITCMP1_B32 $sgpr4, 0, implicit-def $scc
+  ; GFX10-FLATSCR-NEXT:   $sgpr4 = S_BITSET0_B32 0, $sgpr4
+  ; GFX10-FLATSCR-NEXT:   $vgpr1 = V_MOV_B32_e32 killed $sgpr4, implicit $exec
   ; GFX10-FLATSCR-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
   ; GFX10-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX10-FLATSCR-NEXT: {{  $}}
@@ -1191,10 +1191,10 @@ body:             |
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; GFX9-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
-  ; GFX9-FLATSCR-NEXT:   $vcc_hi = S_ADDC_U32 $sgpr32, 8200, implicit-def $scc, implicit $scc
-  ; GFX9-FLATSCR-NEXT:   S_BITCMP1_B32 $vcc_hi, 0, implicit-def $scc
-  ; GFX9-FLATSCR-NEXT:   $vcc_hi = S_BITSET0_B32 0, $vcc_hi
-  ; GFX9-FLATSCR-NEXT:   $vgpr1 = V_MOV_B32_e32 killed $vcc_hi, implicit $exec
+  ; GFX9-FLATSCR-NEXT:   $sgpr4 = S_ADDC_U32 $sgpr32, 8200, implicit-def $scc, implicit $scc
+  ; GFX9-FLATSCR-NEXT:   S_BITCMP1_B32 $sgpr4, 0, implicit-def $scc
+  ; GFX9-FLATSCR-NEXT:   $sgpr4 = S_BITSET0_B32 0, $sgpr4
+  ; GFX9-FLATSCR-NEXT:   $vgpr1 = V_MOV_B32_e32 killed $sgpr4, implicit $exec
   ; GFX9-FLATSCR-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
   ; GFX9-FLATSCR-NEXT:   $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5)
   ; GFX9-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
@@ -1214,10 +1214,10 @@ body:             |
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; GFX10-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
-  ; GFX10-FLATSCR-NEXT:   $vcc_lo = S_ADDC_U32 $sgpr32, 8200, implicit-def $scc, implicit $scc
-  ; GFX10-FLATSCR-NEXT:   S_BITCMP1_B32 $vcc_lo, 0, implicit-def $scc
-  ; GFX10-FLATSCR-NEXT:   $vcc_lo = S_BITSET0_B32 0, $vcc_lo
-  ; GFX10-FLATSCR-NEXT:   $vgpr1 = V_MOV_B32_e32 killed $vcc_lo, implicit $exec
+  ; GFX10-FLATSCR-NEXT:   $sgpr4 = S_ADDC_U32 $sgpr32, 8200, implicit-def $scc, implicit $scc
+  ; GFX10-FLATSCR-NEXT:   S_BITCMP1_B32 $sgpr4, 0, implicit-def $scc
+  ; GFX10-FLATSCR-NEXT:   $sgpr4 = S_BITSET0_B32 0, $sgpr4
+  ; GFX10-FLATSCR-NEXT:   $vgpr1 = V_MOV_B32_e32 killed $sgpr4, implicit $exec
   ; GFX10-FLATSCR-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
   ; GFX10-FLATSCR-NEXT:   $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5)
   ; GFX10-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
@@ -1303,10 +1303,10 @@ body:             |
   ; GFX9-FLATSCR-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX9-FLATSCR-NEXT:   $vcc_hi = S_ADDC_U32 $sgpr32, 8200, implicit-def $scc, implicit $scc
-  ; GFX9-FLATSCR-NEXT:   S_BITCMP1_B32 $vcc_hi, 0, implicit-def $scc
-  ; GFX9-FLATSCR-NEXT:   $vcc_hi = S_BITSET0_B32 0, $vcc_hi
-  ; GFX9-FLATSCR-NEXT:   $vgpr0 = V_MOV_B32_e32 killed $vcc_hi, implicit $exec
+  ; GFX9-FLATSCR-NEXT:   $sgpr4 = S_ADDC_U32 $sgpr32, 8200, implicit-def $scc, implicit $scc
+  ; GFX9-FLATSCR-NEXT:   S_BITCMP1_B32 $sgpr4, 0, implicit-def $scc
+  ; GFX9-FLATSCR-NEXT:   $sgpr4 = S_BITSET0_B32 0, $sgpr4
+  ; GFX9-FLATSCR-NEXT:   $vgpr0 = V_MOV_B32_e32 killed $sgpr4, implicit $exec
   ; GFX9-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT: bb.1:
@@ -1323,10 +1323,10 @@ body:             |
   ; GFX10-FLATSCR-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX10-FLATSCR-NEXT:   $vcc_lo = S_ADDC_U32 $sgpr32, 8200, implicit-def $scc, implicit $scc
-  ; GFX10-FLATSCR-NEXT:   S_BITCMP1_B32 $vcc_lo, 0, implicit-def $scc
-  ; GFX10-FLATSCR-NEXT:   $vcc_lo = S_BITSET0_B32 0, $vcc_lo
-  ; GFX10-FLATSCR-NEXT:   $vgpr0 = V_MOV_B32_e32 killed $vcc_lo, implicit $exec
+  ; GFX10-FLATSCR-NEXT:   $sgpr4 = S_ADDC_U32 $sgpr32, 8200, implicit-def $scc, implicit $scc
+  ; GFX10-FLATSCR-NEXT:   S_BITCMP1_B32 $sgpr4, 0, implicit-def $scc
+  ; GFX10-FLATSCR-NEXT:   $sgpr4 = S_BITSET0_B32 0, $sgpr4
+  ; GFX10-FLATSCR-NEXT:   $vgpr0 = V_MOV_B32_e32 killed $sgpr4, implicit $exec
   ; GFX10-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT: bb.1:


        


More information about the llvm-commits mailing list