[llvm] r312561 - AMDGPU: Fix not accounting for tail call resource usage

Tue Sep 5 11:36:36 PDT 2017

Author: arsenm
Date: Tue Sep  5 11:36:36 2017
New Revision: 312561

URL: http://llvm.org/viewvc/llvm-project?rev=312561&view=rev
Log:
AMDGPU: Fix not accounting for tail call resource usage

If the only call in a function is a tail call, the
function isn't considered to have a call since it's a
type of return.

Modified:
    llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
    llvm/trunk/test/CodeGen/AMDGPU/sibling-call.ll

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp?rev=312561&r1=312560&r2=312561&view=diff
==============================================================================

--- llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp Tue Sep  5 11:36:36 2017
@@ -500,7 +500,8 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo
 
   // If there are no calls, MachineRegisterInfo can tell us the used register
   // count easily.
-  if (!FrameInfo.hasCalls()) {
+  // A tail call isn't considered a call for MachineFrameInfo's purposes.
+  if (!FrameInfo.hasCalls() && !FrameInfo.hasTailCall()) {
     MCPhysReg HighestVGPRReg = AMDGPU::NoRegister;
     for (MCPhysReg Reg : reverse(AMDGPU::VGPR_32RegClass.getRegisters())) {
       if (MRI.isPhysRegUsed(Reg)) {

Modified: llvm/trunk/test/CodeGen/AMDGPU/sibling-call.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sibling-call.ll?rev=312561&r1=312560&r2=312561&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sibling-call.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sibling-call.ll Tue Sep  5 11:36:36 2017
@@ -11,6 +11,22 @@ define fastcc i32 @i32_fastcc_i32_i32(i3
   ret i32 %add0
 }
 
+; GCN-LABEL: {{^}}i32_fastcc_i32_i32_stack_object:
+; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN: s_mov_b32 s5, s32
+; GCN: v_add_i32_e32 v0, vcc, v1, v
+; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s5 offset:24
+; GCN: s_waitcnt vmcnt(0)
+; GCN: s_setpc_b64
+; GCN: ; ScratchSize: 68
+define fastcc i32 @i32_fastcc_i32_i32_stack_object(i32 %arg0, i32 %arg1) #1 {
+  %alloca = alloca [16 x i32], align 4
+  %gep = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 5
+  store volatile i32 9, i32* %gep
+  %add0 = add i32 %arg0, %arg1
+  ret i32 %add0
+}
+
 ; GCN-LABEL: {{^}}sibling_call_i32_fastcc_i32_i32:
 define fastcc i32 @sibling_call_i32_fastcc_i32_i32(i32 %a, i32 %b, i32 %c) #1 {
 entry:
@@ -22,6 +38,7 @@ entry:
 ; GCN: v_mov_b32_e32 [[NINE:v[0-9]+]], 9
 ; GCN: buffer_store_dword [[NINE]], off, s[0:3], s5 offset:24
 ; GCN: s_setpc_b64
+; GCN: ; ScratchSize: 68
 define fastcc i32 @sibling_call_i32_fastcc_i32_i32_stack_object(i32 %a, i32 %b, i32 %c) #1 {
 entry:
   %alloca = alloca [16 x i32], align 4
@@ -31,6 +48,20 @@ entry:
   ret i32 %ret
 }
 
+; GCN-LABEL: {{^}}sibling_call_i32_fastcc_i32_i32_callee_stack_object:
+; GCN: v_mov_b32_e32 [[NINE:v[0-9]+]], 9
+; GCN: buffer_store_dword [[NINE]], off, s[0:3], s5 offset:24
+; GCN: s_setpc_b64
+; GCN: ; ScratchSize: 136
+define fastcc i32 @sibling_call_i32_fastcc_i32_i32_callee_stack_object(i32 %a, i32 %b, i32 %c) #1 {
+entry:
+  %alloca = alloca [16 x i32], align 4
+  %gep = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 5
+  store volatile i32 9, i32* %gep
+  %ret = tail call fastcc i32 @i32_fastcc_i32_i32_stack_object(i32 %a, i32 %b)
+  ret i32 %ret
+}
+
 ; GCN-LABEL: {{^}}sibling_call_i32_fastcc_i32_i32_unused_result:
 define fastcc void @sibling_call_i32_fastcc_i32_i32_unused_result(i32 %a, i32 %b, i32 %c) #1 {
 entry: