[PATCH] D37411: AMDGPU: Fix not accounting for tail call resource usage
Matt Arsenault via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 5 10:45:20 PDT 2017
arsenm updated this revision to Diff 113883.
arsenm added a comment.
Add comment
https://reviews.llvm.org/D37411
Files:
lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
test/CodeGen/AMDGPU/sibling-call.ll
Index: test/CodeGen/AMDGPU/sibling-call.ll
===================================================================
--- test/CodeGen/AMDGPU/sibling-call.ll
+++ test/CodeGen/AMDGPU/sibling-call.ll
@@ -11,6 +11,22 @@
ret i32 %add0
}
+; GCN-LABEL: {{^}}i32_fastcc_i32_i32_stack_object:
+; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN: s_mov_b32 s5, s32
+; GCN: v_add_i32_e32 v0, vcc, v1, v
+; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s5 offset:24
+; GCN: s_waitcnt vmcnt(0)
+; GCN: s_setpc_b64
+; GCN: ; ScratchSize: 68
+define fastcc i32 @i32_fastcc_i32_i32_stack_object(i32 %arg0, i32 %arg1) #1 {
+ %alloca = alloca [16 x i32], align 4
+ %gep = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 5
+ store volatile i32 9, i32* %gep
+ %add0 = add i32 %arg0, %arg1
+ ret i32 %add0
+}
+
; GCN-LABEL: {{^}}sibling_call_i32_fastcc_i32_i32:
define fastcc i32 @sibling_call_i32_fastcc_i32_i32(i32 %a, i32 %b, i32 %c) #1 {
entry:
@@ -22,6 +38,7 @@
; GCN: v_mov_b32_e32 [[NINE:v[0-9]+]], 9
; GCN: buffer_store_dword [[NINE]], off, s[0:3], s5 offset:24
; GCN: s_setpc_b64
+; GCN: ; ScratchSize: 68
define fastcc i32 @sibling_call_i32_fastcc_i32_i32_stack_object(i32 %a, i32 %b, i32 %c) #1 {
entry:
%alloca = alloca [16 x i32], align 4
@@ -31,6 +48,20 @@
ret i32 %ret
}
+; GCN-LABEL: {{^}}sibling_call_i32_fastcc_i32_i32_callee_stack_object:
+; GCN: v_mov_b32_e32 [[NINE:v[0-9]+]], 9
+; GCN: buffer_store_dword [[NINE]], off, s[0:3], s5 offset:24
+; GCN: s_setpc_b64
+; GCN: ; ScratchSize: 136
+define fastcc i32 @sibling_call_i32_fastcc_i32_i32_callee_stack_object(i32 %a, i32 %b, i32 %c) #1 {
+entry:
+ %alloca = alloca [16 x i32], align 4
+ %gep = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 5
+ store volatile i32 9, i32* %gep
+ %ret = tail call fastcc i32 @i32_fastcc_i32_i32_stack_object(i32 %a, i32 %b)
+ ret i32 %ret
+}
+
; GCN-LABEL: {{^}}sibling_call_i32_fastcc_i32_i32_unused_result:
define fastcc void @sibling_call_i32_fastcc_i32_i32_unused_result(i32 %a, i32 %b, i32 %c) #1 {
entry:
Index: lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
===================================================================
--- lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -500,7 +500,8 @@
// If there are no calls, MachineRegisterInfo can tell us the used register
// count easily.
- if (!FrameInfo.hasCalls()) {
+ // A tail call isn't considered a call for MachineFrameInfo's purposes.
+ if (!FrameInfo.hasCalls() && !FrameInfo.hasTailCall()) {
MCPhysReg HighestVGPRReg = AMDGPU::NoRegister;
for (MCPhysReg Reg : reverse(AMDGPU::VGPR_32RegClass.getRegisters())) {
if (MRI.isPhysRegUsed(Reg)) {
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D37411.113883.patch
Type: text/x-patch
Size: 2725 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170905/f6e4280e/attachment.bin>
More information about the llvm-commits
mailing list