[llvm] r295891 - AMDGPU: Don't add emergency stack slot if all spills are SGPR->VGPR

Wed Feb 22 14:23:32 PST 2017

Author: arsenm
Date: Wed Feb 22 16:23:32 2017
New Revision: 295891

URL: http://llvm.org/viewvc/llvm-project?rev=295891&view=rev
Log:
AMDGPU: Don't add emergency stack slot if all spills are SGPR->VGPR

This should avoid reporting any stack needs to be allocated in the
case where no stack is truly used. An unused stack slot is still
left around in other cases where there are real stack objects
but no spilling occurs.

Modified:
    llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp
    llvm/trunk/test/CodeGen/AMDGPU/si-sgpr-spill.ll

Modified: llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp?rev=295891&r1=295890&r2=295891&view=diff
==============================================================================

--- llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp Wed Feb 22 16:23:32 2017
@@ -383,6 +383,16 @@ void SIFrameLowering::emitEpilogue(Machi
 
 }
 
+static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) {
+  for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
+       I != E; ++I) {
+    if (!MFI.isDeadObjectIndex(I))
+      return false;
+  }
+
+  return true;
+}
+
 void SIFrameLowering::processFunctionBeforeFrameFinalized(
   MachineFunction &MF,
   RegScavenger *RS) const {
@@ -391,8 +401,51 @@ void SIFrameLowering::processFunctionBef
   if (!MFI.hasStackObjects())
     return;
 
-  bool MayNeedScavengingEmergencySlot = MFI.hasStackObjects();
-  if (MayNeedScavengingEmergencySlot) {
+  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+  const SIInstrInfo *TII = ST.getInstrInfo();
+  const SIRegisterInfo &TRI = TII->getRegisterInfo();
+  SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
+  bool AllSGPRSpilledToVGPRs = false;
+
+  if (TRI.spillSGPRToVGPR() && FuncInfo->hasSpilledSGPRs()) {
+    AllSGPRSpilledToVGPRs = true;
+
+    // Process all SGPR spills before frame offsets are finalized. Ideally SGPRs
+    // are spilled to VGPRs, in which case we can eliminate the stack usage.
+    //
+    // XXX - This operates under the assumption that only other SGPR spills are
+    // users of the frame index. I'm not 100% sure this is correct. The
+    // StackColoring pass has a comment saying a future improvement would be to
+    // merging of allocas with spill slots, but for now according to
+    // MachineFrameInfo isSpillSlot can't alias any other object.
+    for (MachineBasicBlock &MBB : MF) {
+      MachineBasicBlock::iterator Next;
+      for (auto I = MBB.begin(), E = MBB.end(); I != E; I = Next) {
+        MachineInstr &MI = *I;
+        Next = std::next(I);
+
+        if (TII->isSGPRSpill(MI)) {
+          int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex();
+          if (FuncInfo->allocateSGPRSpillToVGPR(MF, FI)) {
+            bool Spilled = TRI.eliminateSGPRToVGPRSpillFrameIndex(MI, FI, RS);
+            (void)Spilled;
+            assert(Spilled && "failed to spill SGPR to VGPR when allocated");
+          } else
+            AllSGPRSpilledToVGPRs = false;
+        }
+      }
+    }
+
+    FuncInfo->removeSGPRToVGPRFrameIndices(MFI);
+  }
+
+  // FIXME: The other checks should be redundant with allStackObjectsAreDead,
+  // but currently hasNonSpillStackObjects is set only from source
+  // allocas. Stack temps produced from legalization are not counted currently.
+  if (FuncInfo->hasNonSpillStackObjects() || FuncInfo->hasSpilledVGPRs() ||
+      !AllSGPRSpilledToVGPRs || !allStackObjectsAreDead(MFI)) {
+    assert(RS && "RegScavenger required if spilling");
+
     // We force this to be at offset 0 so no user object ever has 0 as an
     // address, so we may use 0 as an invalid pointer value. This is because
     // LLVM assumes 0 is an invalid pointer in address space 0. Because alloca
@@ -410,40 +463,6 @@ void SIFrameLowering::processFunctionBef
       AMDGPU::SGPR_32RegClass.getSize(), 0, false);
     RS->addScavengingFrameIndex(ScavengeFI);
   }
-
-  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
-  const SIInstrInfo *TII = ST.getInstrInfo();
-  const SIRegisterInfo &TRI = TII->getRegisterInfo();
-  if (!TRI.spillSGPRToVGPR())
-    return;
-
-  SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
-  if (!FuncInfo->hasSpilledSGPRs())
-    return;
-
-  // Process all SGPR spills before frame offsets are finalized. Ideally SGPRs
-  // are spilled to VGPRs, in which case we can eliminate the stack usage.
-  //
-  // XXX - This operates under the assumption that only other SGPR spills are
-  // users of the frame index. I'm not 100% sure this is correct. The
-  // StackColoring pass has a comment saying a future improvement would be to
-  // merging of allocas with spill slots, but for now according to
-  // MachineFrameInfo isSpillSlot can't alias any other object.
-  for (MachineBasicBlock &MBB : MF) {
-    MachineBasicBlock::iterator Next;
-    for (auto I = MBB.begin(), E = MBB.end(); I != E; I = Next) {
-      MachineInstr &MI = *I;
-      Next = std::next(I);
-
-      if (TII->isSGPRSpill(MI)) {
-        int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex();
-        if (FuncInfo->allocateSGPRSpillToVGPR(MF, FI))
-          TRI.eliminateSGPRToVGPRSpillFrameIndex(MI, FI, RS);
-      }
-    }
-  }
-
-  FuncInfo->removeSGPRToVGPRFrameIndices(MFI);
 }
 
 void SIFrameLowering::emitDebuggerPrologue(MachineFunction &MF,

Modified: llvm/trunk/test/CodeGen/AMDGPU/si-sgpr-spill.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/si-sgpr-spill.ll?rev=295891&r1=295890&r2=295891&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/si-sgpr-spill.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/si-sgpr-spill.ll Wed Feb 22 16:23:32 2017
@@ -18,13 +18,12 @@
 ; GCN: s_mov_b32 m0
 
 ; Make sure scratch space isn't being used for SGPR->VGPR spills
-; FIXME: Seem to be leaving behind unused emergency slot.
 
 ; Writing to M0 from an SMRD instruction will hang the GPU.
 ; GCN-NOT: s_buffer_load_dword m0
 ; GCN: s_endpgm
 
-; TOVGPR: ScratchSize: 4{{$}}
+; TOVGPR: ScratchSize: 0{{$}}
 define amdgpu_ps void @main([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) {
 main_body:
   %tmp = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %arg, i64 0, i32 0
@@ -768,7 +767,7 @@ ENDIF66:
 
 ; GCN-LABEL: {{^}}main1:
 ; GCN: s_endpgm
-; TOVGPR: ScratchSize: 4{{$}}
+; TOVGPR: ScratchSize: 0{{$}}
 define amdgpu_ps void @main1([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) {
 main_body:
   %tmp = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %arg, i64 0, i32 0