[llvm] 8b898b1 - [AMDGPU] Remove unused tmp register

Sebastian Neubauer via llvm-commits llvm-commits at lists.llvm.org
Tue Feb 2 08:18:14 PST 2021


Author: Sebastian Neubauer
Date: 2021-02-02T17:17:54+01:00
New Revision: 8b898b19a8cb90871fa85a6db924c61b9f22cfe6

URL: https://github.com/llvm/llvm-project/commit/8b898b19a8cb90871fa85a6db924c61b9f22cfe6
DIFF: https://github.com/llvm/llvm-project/commit/8b898b19a8cb90871fa85a6db924c61b9f22cfe6.diff

LOG: [AMDGPU] Remove unused tmp register

The temporary register is only used to compute the frame pointer.
The frame pointer is overwritten and not used in between, so we
can reuse the frame pointer for the computation, saving one register.

Differential Revision: https://reviews.llvm.org/D95865

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
    llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-uniform.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll
    llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll
    llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
    llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll
    llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir
    llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir
    llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir
    llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir
    llvm/test/CodeGen/AMDGPU/stack-realign.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index 246587dff055..70a035eb8af7 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -1022,19 +1022,14 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
       LiveRegs.addLiveIns(MBB);
     }
 
-    Register ScratchSPReg = findScratchNonCalleeSaveRegister(
-        MRI, LiveRegs, AMDGPU::SReg_32_XM0RegClass);
-    assert(ScratchSPReg && ScratchSPReg != FuncInfo->SGPRForFPSaveRestoreCopy &&
-           ScratchSPReg != FuncInfo->SGPRForBPSaveRestoreCopy);
-
-    // s_add_u32 tmp_reg, s32, NumBytes
-    // s_and_b32 s32, tmp_reg, 0b111...0000
-    BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), ScratchSPReg)
+    // s_add_u32 s33, s32, NumBytes
+    // s_and_b32 s33, s33, 0b111...0000
+    BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), FramePtrReg)
         .addReg(StackPtrReg)
         .addImm((Alignment - 1) * getScratchScaleFactor(ST))
         .setMIFlag(MachineInstr::FrameSetup);
     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_AND_B32), FramePtrReg)
-        .addReg(ScratchSPReg, RegState::Kill)
+        .addReg(FramePtrReg, RegState::Kill)
         .addImm(-Alignment * getScratchScaleFactor(ST))
         .setMIFlag(MachineInstr::FrameSetup);
     FuncInfo->setIsStackRealigned(true);

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-uniform.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-uniform.ll
index 2a2037247375..a8373474be3a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-uniform.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-uniform.ll
@@ -259,9 +259,9 @@ define void @func_dynamic_stackalloc_sgpr_align32(i32 addrspace(1)* %out) {
 ; GFX9-LABEL: func_dynamic_stackalloc_sgpr_align32:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    s_add_u32 s4, s32, 0x7c0
 ; GFX9-NEXT:    s_mov_b32 s6, s33
-; GFX9-NEXT:    s_and_b32 s33, s4, 0xfffff800
+; GFX9-NEXT:    s_add_u32 s33, s32, 0x7c0
+; GFX9-NEXT:    s_and_b32 s33, s33, 0xfffff800
 ; GFX9-NEXT:    s_add_u32 s32, s32, 0x1000
 ; GFX9-NEXT:    s_getpc_b64 s[4:5]
 ; GFX9-NEXT:    s_add_u32 s4, s4, gv at gotpcrel32@lo+4
@@ -287,16 +287,16 @@ define void @func_dynamic_stackalloc_sgpr_align32(i32 addrspace(1)* %out) {
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT:    s_add_u32 s4, s32, 0x3e0
 ; GFX10-NEXT:    s_mov_b32 s6, s33
-; GFX10-NEXT:    s_and_b32 s33, s4, 0xfffffc00
+; GFX10-NEXT:    s_add_u32 s33, s32, 0x3e0
+; GFX10-NEXT:    v_mov_b32_e32 v0, 0
+; GFX10-NEXT:    s_and_b32 s33, s33, 0xfffffc00
 ; GFX10-NEXT:    s_add_u32 s32, s32, 0x800
 ; GFX10-NEXT:    s_getpc_b64 s[4:5]
 ; GFX10-NEXT:    s_add_u32 s4, s4, gv at gotpcrel32@lo+4
 ; GFX10-NEXT:    s_addc_u32 s5, s5, gv at gotpcrel32@hi+12
-; GFX10-NEXT:    v_mov_b32_e32 v0, 0
-; GFX10-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
 ; GFX10-NEXT:    s_mov_b32 s33, s6
+; GFX10-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    s_load_dword s4, s[4:5], 0x0
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll
index f5caf8f83468..3ab8af27c704 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll
@@ -8,10 +8,10 @@ define i32 @v_extract_v64i32_varidx(<64 x i32> addrspace(1)* %ptr, i32 %idx) {
 ; GCN-LABEL: v_extract_v64i32_varidx:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    s_add_u32 s4, s32, 0x3fc0
-; GCN-NEXT:    v_add_co_u32_e32 v12, vcc, 64, v0
 ; GCN-NEXT:    s_mov_b32 s6, s33
-; GCN-NEXT:    s_and_b32 s33, s4, 0xffffc000
+; GCN-NEXT:    s_add_u32 s33, s32, 0x3fc0
+; GCN-NEXT:    s_and_b32 s33, s33, 0xffffc000
+; GCN-NEXT:    v_add_co_u32_e32 v12, vcc, 64, v0
 ; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill
 ; GCN-NEXT:    buffer_store_dword v41, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill
 ; GCN-NEXT:    buffer_store_dword v42, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill
@@ -330,10 +330,10 @@ define i16 @v_extract_v128i16_varidx(<128 x i16> addrspace(1)* %ptr, i32 %idx) {
 ; GCN-LABEL: v_extract_v128i16_varidx:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    s_add_u32 s4, s32, 0x3fc0
-; GCN-NEXT:    v_add_co_u32_e32 v12, vcc, 64, v0
 ; GCN-NEXT:    s_mov_b32 s6, s33
-; GCN-NEXT:    s_and_b32 s33, s4, 0xffffc000
+; GCN-NEXT:    s_add_u32 s33, s32, 0x3fc0
+; GCN-NEXT:    s_and_b32 s33, s33, 0xffffc000
+; GCN-NEXT:    v_add_co_u32_e32 v12, vcc, 64, v0
 ; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill
 ; GCN-NEXT:    buffer_store_dword v41, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill
 ; GCN-NEXT:    buffer_store_dword v42, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill
@@ -657,10 +657,10 @@ define i64 @v_extract_v32i64_varidx(<32 x i64> addrspace(1)* %ptr, i32 %idx) {
 ; GCN-LABEL: v_extract_v32i64_varidx:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    s_add_u32 s4, s32, 0x3fc0
-; GCN-NEXT:    v_add_co_u32_e32 v3, vcc, 64, v0
 ; GCN-NEXT:    s_mov_b32 s6, s33
-; GCN-NEXT:    s_and_b32 s33, s4, 0xffffc000
+; GCN-NEXT:    s_add_u32 s33, s32, 0x3fc0
+; GCN-NEXT:    s_and_b32 s33, s33, 0xffffc000
+; GCN-NEXT:    v_add_co_u32_e32 v3, vcc, 64, v0
 ; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill
 ; GCN-NEXT:    buffer_store_dword v41, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill
 ; GCN-NEXT:    buffer_store_dword v42, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll
index 1fb9efe49fcc..3d2731b12869 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll
@@ -231,9 +231,9 @@ define void @func_non_entry_block_static_alloca_align64(i32 addrspace(1)* %out,
 ; GCN-LABEL: func_non_entry_block_static_alloca_align64:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    s_add_u32 s4, s32, 0xfc0
 ; GCN-NEXT:    s_mov_b32 s8, s33
-; GCN-NEXT:    s_and_b32 s33, s4, 0xfffff000
+; GCN-NEXT:    s_add_u32 s33, s32, 0xfc0
+; GCN-NEXT:    s_and_b32 s33, s33, 0xfffff000
 ; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v2
 ; GCN-NEXT:    s_add_u32 s32, s32, 0x2000
 ; GCN-NEXT:    s_and_saveexec_b64 s[4:5], vcc

diff  --git a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll
index 115826bda90b..d3bb04f673ff 100644
--- a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll
+++ b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll
@@ -335,12 +335,12 @@ define void @no_new_vgpr_for_fp_csr() #1 {
 
 ; GCN-LABEL: {{^}}realign_stack_no_fp_elim:
 ; GCN: s_waitcnt
-; MUBUF-NEXT:   s_add_u32 [[SCRATCH:s[0-9]+]], s32, 0x7ffc0
-; FLATSCR-NEXT: s_add_u32 [[SCRATCH:s[0-9]+]], s32, 0x1fff
 ; MUBUF-NEXT:   s_mov_b32 [[FP_COPY:s4]], s33
 ; FLATSCR-NEXT: s_mov_b32 [[FP_COPY:s0]], s33
-; MUBUF-NEXT:   s_and_b32 s33, [[SCRATCH]], 0xfff80000
-; FLATSCR-NEXT: s_and_b32 s33, [[SCRATCH]], 0xffffe000
+; MUBUF-NEXT:   s_add_u32 s33, s32, 0x7ffc0
+; FLATSCR-NEXT: s_add_u32 s33, s32, 0x1fff
+; MUBUF-NEXT:   s_and_b32 s33, s33, 0xfff80000
+; FLATSCR-NEXT: s_and_b32 s33, s33, 0xffffe000
 ; MUBUF-NEXT:   s_add_u32 s32, s32, 0x100000
 ; FLATSCR-NEXT: s_add_u32 s32, s32, 0x4000
 ; GCN-NEXT:     v_mov_b32_e32 [[ZERO:v[0-9]+]], 0

diff  --git a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
index 3052e73479f4..fae05d1403eb 100644
--- a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
+++ b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
@@ -110,9 +110,9 @@ define void @func_local_stack_offset_uses_sp(i64 addrspace(1)* %out) {
 ; MUBUF-LABEL: func_local_stack_offset_uses_sp:
 ; MUBUF:       ; %bb.0: ; %entry
 ; MUBUF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; MUBUF-NEXT:    s_add_u32 s4, s32, 0x7ffc0
 ; MUBUF-NEXT:    s_mov_b32 s5, s33
-; MUBUF-NEXT:    s_and_b32 s33, s4, 0xfff80000
+; MUBUF-NEXT:    s_add_u32 s33, s32, 0x7ffc0
+; MUBUF-NEXT:    s_and_b32 s33, s33, 0xfff80000
 ; MUBUF-NEXT:    v_lshrrev_b32_e64 v3, 6, s33
 ; MUBUF-NEXT:    v_add_u32_e32 v3, 0x1000, v3
 ; MUBUF-NEXT:    v_mov_b32_e32 v4, 0
@@ -152,9 +152,9 @@ define void @func_local_stack_offset_uses_sp(i64 addrspace(1)* %out) {
 ; FLATSCR-LABEL: func_local_stack_offset_uses_sp:
 ; FLATSCR:       ; %bb.0: ; %entry
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; FLATSCR-NEXT:    s_add_u32 s0, s32, 0x1fff
 ; FLATSCR-NEXT:    s_mov_b32 s2, s33
-; FLATSCR-NEXT:    s_and_b32 s33, s0, 0xffffe000
+; FLATSCR-NEXT:    s_add_u32 s33, s32, 0x1fff
+; FLATSCR-NEXT:    s_and_b32 s33, s33, 0xffffe000
 ; FLATSCR-NEXT:    v_mov_b32_e32 v2, 0
 ; FLATSCR-NEXT:    s_mov_b32 s0, 0
 ; FLATSCR-NEXT:    s_add_u32 s32, s32, 0x6000

diff  --git a/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll b/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll
index 8eb8f9162542..d3cf255309ef 100644
--- a/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll
+++ b/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll
@@ -320,9 +320,9 @@ define void @func_non_entry_block_static_alloca_align64(i32 addrspace(1)* %out,
 ; MUBUF-LABEL: func_non_entry_block_static_alloca_align64:
 ; MUBUF:       ; %bb.0: ; %entry
 ; MUBUF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; MUBUF-NEXT:    s_add_u32 s4, s32, 0xfc0
 ; MUBUF-NEXT:    s_mov_b32 s7, s33
-; MUBUF-NEXT:    s_and_b32 s33, s4, 0xfffff000
+; MUBUF-NEXT:    s_add_u32 s33, s32, 0xfc0
+; MUBUF-NEXT:    s_and_b32 s33, s33, 0xfffff000
 ; MUBUF-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v2
 ; MUBUF-NEXT:    s_add_u32 s32, s32, 0x2000
 ; MUBUF-NEXT:    s_and_saveexec_b64 s[4:5], vcc
@@ -354,9 +354,9 @@ define void @func_non_entry_block_static_alloca_align64(i32 addrspace(1)* %out,
 ; FLATSCR-LABEL: func_non_entry_block_static_alloca_align64:
 ; FLATSCR:       ; %bb.0: ; %entry
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; FLATSCR-NEXT:    s_add_u32 s0, s32, 63
 ; FLATSCR-NEXT:    s_mov_b32 s3, s33
-; FLATSCR-NEXT:    s_and_b32 s33, s0, 0xffffffc0
+; FLATSCR-NEXT:    s_add_u32 s33, s32, 63
+; FLATSCR-NEXT:    s_andn2_b32 s33, s33, 63
 ; FLATSCR-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v2
 ; FLATSCR-NEXT:    s_add_u32 s32, s32, 0x80
 ; FLATSCR-NEXT:    s_and_saveexec_b64 s[0:1], vcc

diff  --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir
index 7ad421525c6c..aebbb1add91a 100644
--- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir
+++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir
@@ -29,8 +29,8 @@ body:             |
     ; CHECK-LABEL: name: scavenge_sgpr_pei_no_sgprs
     ; CHECK: liveins: $sgpr27, $vgpr1
     ; CHECK: $sgpr27 = frame-setup COPY $sgpr33
-    ; CHECK: $sgpr4 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc
-    ; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr4, 4294443008, implicit-def $scc
+    ; CHECK: $sgpr33 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc
+    ; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def $scc
     ; CHECK: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 1572864, implicit-def $scc
     ; CHECK: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr17, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
     ; CHECK: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
@@ -73,8 +73,8 @@ body:             |
     ; CHECK-LABEL: name: scavenge_sgpr_pei_one_sgpr
     ; CHECK: liveins: $sgpr27, $vgpr1
     ; CHECK: $sgpr27 = frame-setup COPY $sgpr33
-    ; CHECK: $sgpr4 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc
-    ; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr4, 4294443008, implicit-def $scc
+    ; CHECK: $sgpr33 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc
+    ; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def $scc
     ; CHECK: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 1572864, implicit-def $scc
     ; CHECK: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr17, implicit-def $sgpr28, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
     ; CHECK: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
@@ -115,8 +115,8 @@ body:             |
     ; CHECK-LABEL: name: scavenge_sgpr_pei_one_sgpr_64
     ; CHECK: liveins: $sgpr27, $vgpr1
     ; CHECK: $sgpr27 = frame-setup COPY $sgpr33
-    ; CHECK: $sgpr4 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc
-    ; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr4, 4294443008, implicit-def $scc
+    ; CHECK: $sgpr33 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc
+    ; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def $scc
     ; CHECK: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 1572864, implicit-def $scc
     ; CHECK: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr17, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
     ; CHECK: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
@@ -156,8 +156,8 @@ body:             |
     ; CHECK-LABEL: name: scavenge_sgpr_pei_prefer_vcc
     ; CHECK: liveins: $sgpr27, $vgpr1
     ; CHECK: $sgpr27 = frame-setup COPY $sgpr33
-    ; CHECK: $sgpr4 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc
-    ; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr4, 4294443008, implicit-def $scc
+    ; CHECK: $sgpr33 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc
+    ; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def $scc
     ; CHECK: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 1572864, implicit-def $scc
     ; CHECK: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr17, implicit-def $sgpr30, implicit-def $sgpr31
     ; CHECK: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec

diff  --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir
index 990428d0809a..0d46ec6dfa19 100644
--- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir
+++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir
@@ -25,8 +25,8 @@ body:             |
     ; MUBUF-LABEL: name: scavenge_sgpr_pei_no_sgprs
     ; MUBUF: liveins: $sgpr27, $vgpr1
     ; MUBUF: $sgpr27 = frame-setup COPY $sgpr33
-    ; MUBUF: $sgpr4 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc
-    ; MUBUF: $sgpr33 = frame-setup S_AND_B32 killed $sgpr4, 4294443008, implicit-def $scc
+    ; MUBUF: $sgpr33 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc
+    ; MUBUF: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def $scc
     ; MUBUF: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 1572864, implicit-def $scc
     ; MUBUF: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr17, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
     ; MUBUF: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
@@ -39,8 +39,8 @@ body:             |
     ; FLATSCR-LABEL: name: scavenge_sgpr_pei_no_sgprs
     ; FLATSCR: liveins: $sgpr27, $vgpr1
     ; FLATSCR: $sgpr27 = frame-setup COPY $sgpr33
-    ; FLATSCR: $sgpr4 = frame-setup S_ADD_U32 $sgpr32, 8191, implicit-def $scc
-    ; FLATSCR: $sgpr33 = frame-setup S_AND_B32 killed $sgpr4, 4294959104, implicit-def $scc
+    ; FLATSCR: $sgpr33 = frame-setup S_ADD_U32 $sgpr32, 8191, implicit-def $scc
+    ; FLATSCR: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def $scc
     ; FLATSCR: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 24576, implicit-def $scc
     ; FLATSCR: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr17, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
     ; FLATSCR: $vgpr0 = V_MOV_B32_e32 $sgpr33, implicit $exec

diff  --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir
index cf18c235aa87..6ef8a7bddd89 100644
--- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir
+++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir
@@ -24,8 +24,8 @@ body:             |
     ; CHECK-LABEL: name: scavenge_sgpr_pei
     ; CHECK: liveins: $sgpr27, $vgpr1
     ; CHECK: $sgpr27 = frame-setup COPY $sgpr33
-    ; CHECK: $sgpr4 = frame-setup S_ADD_U32 $sgpr32, 262080, implicit-def $scc
-    ; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr4, 4294705152, implicit-def $scc
+    ; CHECK: $sgpr33 = frame-setup S_ADD_U32 $sgpr32, 262080, implicit-def $scc
+    ; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294705152, implicit-def $scc
     ; CHECK: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 524288, implicit-def $scc
     ; CHECK: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr17, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
     ; CHECK: $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec

diff  --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir
index 130d6283bfd8..18c1d3c28359 100644
--- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir
+++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir
@@ -26,8 +26,8 @@ body:             |
     ; GFX8-LABEL: name: pei_scavenge_vgpr_spill
     ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr2
     ; GFX8: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2
-    ; GFX8: $sgpr4 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc
-    ; GFX8: $sgpr33 = frame-setup S_AND_B32 killed $sgpr4, 4294443008, implicit-def $scc
+    ; GFX8: $sgpr33 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc
+    ; GFX8: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def $scc
     ; GFX8: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 1572864, implicit-def $scc
     ; GFX8: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
     ; GFX8: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 12, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.3, addrspace 5)
@@ -42,8 +42,8 @@ body:             |
     ; GFX9-LABEL: name: pei_scavenge_vgpr_spill
     ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr2
     ; GFX9: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2
-    ; GFX9: $sgpr4 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc
-    ; GFX9: $sgpr33 = frame-setup S_AND_B32 killed $sgpr4, 4294443008, implicit-def $scc
+    ; GFX9: $sgpr33 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc
+    ; GFX9: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def $scc
     ; GFX9: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 1572864, implicit-def $scc
     ; GFX9: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
     ; GFX9: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 12, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.3, addrspace 5)
@@ -57,8 +57,8 @@ body:             |
     ; GFX9-FLATSCR-LABEL: name: pei_scavenge_vgpr_spill
     ; GFX9-FLATSCR: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr2
     ; GFX9-FLATSCR: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2
-    ; GFX9-FLATSCR: $sgpr4 = frame-setup S_ADD_U32 $sgpr32, 8191, implicit-def $scc
-    ; GFX9-FLATSCR: $sgpr33 = frame-setup S_AND_B32 killed $sgpr4, 4294959104, implicit-def $scc
+    ; GFX9-FLATSCR: $sgpr33 = frame-setup S_ADD_U32 $sgpr32, 8191, implicit-def $scc
+    ; GFX9-FLATSCR: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def $scc
     ; GFX9-FLATSCR: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 24576, implicit-def $scc
     ; GFX9-FLATSCR: $vgpr0 = V_MOV_B32_e32 $sgpr33, implicit $exec
     ; GFX9-FLATSCR: $vcc_hi = S_ADD_U32 $sgpr33, 8192, implicit-def $scc

diff  --git a/llvm/test/CodeGen/AMDGPU/stack-realign.ll b/llvm/test/CodeGen/AMDGPU/stack-realign.ll
index a5adbe4b7ef3..33e32c36e8e8 100644
--- a/llvm/test/CodeGen/AMDGPU/stack-realign.ll
+++ b/llvm/test/CodeGen/AMDGPU/stack-realign.ll
@@ -124,9 +124,9 @@ define amdgpu_kernel void @kernel_call_align4_from_5() {
 }
 
 ; GCN-LABEL: {{^}}default_realign_align128:
-; GCN: s_add_u32 [[TMP:s[0-9]+]], s32, 0x1fc0
-; GCN-NEXT: s_mov_b32 [[FP_COPY:s[0-9]+]], s33
-; GCN-NEXT: s_and_b32 s33, [[TMP]], 0xffffe000
+; GCN: s_mov_b32 [[FP_COPY:s[0-9]+]], s33
+; GCN-NEXT: s_add_u32 s33, s32, 0x1fc0
+; GCN-NEXT: s_and_b32 s33, s33, 0xffffe000
 ; GCN-NEXT: s_add_u32 s32, s32, 0x4000
 ; GCN-NOT: s33
 ; GCN: buffer_store_dword v0, off, s[0:3], s33{{$}}
@@ -193,11 +193,11 @@ define i32 @needs_align1024_stack_args_used_inside_loop(%struct.Data addrspace(5
 ; The BP value will get saved/restored in an SGPR at the prolgoue/epilogue.
 
 ; GCN-LABEL: needs_align1024_stack_args_used_inside_loop:
-; GCN: s_mov_b32 [[BP_COPY:s[0-9]+]], s34
+; GCN: s_mov_b32 [[FP_COPY:s[0-9]+]], s33
+; GCN-NEXT: s_add_u32 s33, s32, 0xffc0
+; GCN-NEXT: s_mov_b32 [[BP_COPY:s[0-9]+]], s34
 ; GCN-NEXT: s_mov_b32 s34, s32
-; GCN-NEXT: s_add_u32 [[SCRATCH_REG:s[0-9]+]], s32, 0xffc0
-; GCN-NEXT: s_mov_b32 [[FP_COPY:s[0-9]+]], s33
-; GCN-NEXT: s_and_b32 s33, [[SCRATCH_REG]], 0xffff0000
+; GCN-NEXT: s_and_b32 s33, s33, 0xffff0000
 ; GCN-NEXT: v_mov_b32_e32 v{{[0-9]+}}, 0
 ; GCN-NEXT: v_lshrrev_b32_e64 [[VGPR_REG:v[0-9]+]], 6, s34
 ; GCN: s_add_u32 s32, s32, 0x30000


        


More information about the llvm-commits mailing list