[llvm] aff94ec - [AMDGPU] Remove the dead spill slots while spilling FP/BP to memory

Sat Sep 5 18:35:00 PDT 2020

Author: vnalamot
Date: 2020-09-06T07:04:25+05:30
New Revision: aff94ec0f4ded3eff2d5c0431f5fec9e41731b05

URL: https://github.com/llvm/llvm-project/commit/aff94ec0f4ded3eff2d5c0431f5fec9e41731b05
DIFF: https://github.com/llvm/llvm-project/commit/aff94ec0f4ded3eff2d5c0431f5fec9e41731b05.diff

LOG: [AMDGPU] Remove the dead spill slots while spilling FP/BP to memory

During the PEI pass, the dead TargetStackID::SGPRSpill spill slots
are not being removed while spilling the FP/BP to memory.

Fixes: SWDEV-250393

Reviewed By: arsenm

Differential Revision: https://reviews.llvm.org/D87032

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
    llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll
    llvm/test/CodeGen/AMDGPU/stack-realign.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index 9b795b22f523..c947995fd3ee 100644

--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -112,15 +112,19 @@ static void getVGPRSpillLaneOrTempRegister(MachineFunction &MF,
       // 3: There's no free lane to spill, and no free register to save FP/BP,
       // so we're forced to spill another VGPR to use for the spill.
       FrameIndex = NewFI;
+
+      LLVM_DEBUG(
+          auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front();
+          dbgs() << (IsFP ? "FP" : "BP") << " requires fallback spill to "
+                 << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane << '\n';);
     } else {
+      // Remove dead <NewFI> index
+      MF.getFrameInfo().RemoveStackObject(NewFI);
       // 4: If all else fails, spill the FP/BP to memory.
       FrameIndex = FrameInfo.CreateSpillStackObject(4, Align(4));
+      LLVM_DEBUG(dbgs() << "Reserved FI " << FrameIndex << " for spilling "
+                        << (IsFP ? "FP" : "BP") << '\n');
     }
-
-    LLVM_DEBUG(auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front();
-               dbgs() << (IsFP ? "FP" : "BP") << " requires fallback spill to "
-                      << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane
-                      << '\n';);
   } else {
     LLVM_DEBUG(dbgs() << "Saving " << (IsFP ? "FP" : "BP") << " with copy to "
                       << printReg(TempSGPR, TRI) << '\n');

diff  --git a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll
index 3016d99fc8c1..e6980b895f59 100644
--- a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll
+++ b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll
@@ -463,15 +463,16 @@ define void @ipra_call_with_stack() #0 {
 ; GCN-LABEL: {{^}}callee_need_to_spill_fp_to_memory:
 ; GCN: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
 ; GCN: v_mov_b32_e32 [[TMP_VGPR1:v[0-9]+]], s33
-; GCN: buffer_store_dword [[TMP_VGPR1]], off, s[0:3], s32 offset:[[OFF:[0-9]+]]
+; GCN: buffer_store_dword [[TMP_VGPR1]], off, s[0:3], s32 offset:4
 ; GCN: s_mov_b64 exec, [[COPY_EXEC1]]
 ; GCN: s_mov_b32 s33, s32
 ; GCN: s_or_saveexec_b64 [[COPY_EXEC2:s\[[0-9]+:[0-9]+\]]], -1{{$}}
-; GCN: buffer_load_dword [[TMP_VGPR2:v[0-9]+]], off, s[0:3], s32 offset:[[OFF]]
+; GCN: buffer_load_dword [[TMP_VGPR2:v[0-9]+]], off, s[0:3], s32 offset:4
 ; GCN: s_waitcnt vmcnt(0)
 ; GCN: v_readfirstlane_b32 s33, [[TMP_VGPR2]]
 ; GCN: s_mov_b64 exec, [[COPY_EXEC2]]
 ; GCN: s_setpc_b64
+; GCN: ScratchSize: 8
 define void @callee_need_to_spill_fp_to_memory() #3 {
   call void asm sideeffect "; clobber nonpreserved SGPRs",
     "~{s4},~{s5},~{s6},~{s7},~{s8},~{s9}
@@ -529,8 +530,8 @@ define void @callee_need_to_spill_fp_to_memory_full_reserved_vgpr() #3 {
 ; GCN-LABEL: {{^}}spill_fp_to_memory_scratch_reg_needed_mubuf_offset
 ; GCN: s_or_saveexec_b64 s[4:5], -1
 ; GCN: v_mov_b32_e32 v0, s33
-; GCN-NOT: v_mov_b32_e32 v0, 0x100c
-; GCN-NEXT: v_mov_b32_e32 v1, 0x100c
+; GCN-NOT: v_mov_b32_e32 v0, 0x1008
+; GCN-NEXT: v_mov_b32_e32 v1, 0x1008
 ; GCN-NEXT: buffer_store_dword v0, v1, s[0:3], s32 offen
 define void @spill_fp_to_memory_scratch_reg_needed_mubuf_offset([4096 x i8] addrspace(5)* byval align 4 %arg) #3 {
   %alloca = alloca i32, addrspace(5)

diff  --git a/llvm/test/CodeGen/AMDGPU/stack-realign.ll b/llvm/test/CodeGen/AMDGPU/stack-realign.ll
index e8e3518aed1c..3219c75c43a8 100644
--- a/llvm/test/CodeGen/AMDGPU/stack-realign.ll
+++ b/llvm/test/CodeGen/AMDGPU/stack-realign.ll
@@ -291,12 +291,12 @@ define void @spill_bp_to_memory_scratch_reg_needed_mubuf_offset(<32 x i32> %a, i
 ; GCN-LABEL: spill_bp_to_memory_scratch_reg_needed_mubuf_offset
 ; GCN: s_or_saveexec_b64 s[4:5], -1
 ; GCN: v_mov_b32_e32 v0, s33
-; GCN-NOT: v_mov_b32_e32 v0, 0x1088
-; GCN-NEXT: v_mov_b32_e32 v1, 0x1088
+; GCN-NOT: v_mov_b32_e32 v0, 0x1084
+; GCN-NEXT: v_mov_b32_e32 v1, 0x1084
 ; GCN-NEXT: buffer_store_dword v0, v1, s[0:3], s32 offen
 ; GCN: v_mov_b32_e32 v0, s34
-; GCN-NOT: v_mov_b32_e32 v0, 0x1090
-; GCN-NEXT: v_mov_b32_e32 v1, 0x1090
+; GCN-NOT: v_mov_b32_e32 v0, 0x1088
+; GCN-NEXT: v_mov_b32_e32 v1, 0x1088
 ; GCN-NEXT: buffer_store_dword v0, v1, s[0:3], s32 offen
   %local_val = alloca i32, align 128, addrspace(5)
   store volatile i32 %b, i32 addrspace(5)* %local_val, align 128