[llvm] e353195 - [AMDGPU] Fix stack size metadata for functions with direct and indirect calls (#110828)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 2 06:52:55 PDT 2024
Author: Janek van Oirschot
Date: 2024-10-02T14:52:52+01:00
New Revision: e35319524a3f5834ea1f5a7f7d7624a295be8ab7
URL: https://github.com/llvm/llvm-project/commit/e35319524a3f5834ea1f5a7f7d7624a295be8ab7
DIFF: https://github.com/llvm/llvm-project/commit/e35319524a3f5834ea1f5a7f7d7624a295be8ab7.diff
LOG: [AMDGPU] Fix stack size metadata for functions with direct and indirect calls (#110828)
When a function has an external call, it should still use the stack
sizes of direct, known, calls to calculate its own stack size
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
llvm/test/CodeGen/AMDGPU/function-resource-usage.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
index 1de6bf1631a25b..da0397fa20bd1b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
@@ -159,8 +159,12 @@ void MCResourceInfo::gatherResourceInfo(
ArgExprs.push_back(
MCConstantExpr::create(FRI.CalleeSegmentSize, OutContext));
- if (!FRI.HasIndirectCall) {
- for (const Function *Callee : FRI.Callees) {
+ SmallPtrSet<const Function *, 8> Seen;
+ Seen.insert(&MF.getFunction());
+ for (const Function *Callee : FRI.Callees) {
+ if (!Seen.insert(Callee).second)
+ continue;
+ if (!Callee->isDeclaration()) {
MCSymbol *calleeValSym =
getSymbol(Callee->getName(), RIK_PrivateSegSize, OutContext);
ArgExprs.push_back(MCSymbolRefExpr::create(calleeValSym, OutContext));
diff --git a/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll b/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll
index 9e3264eb9c07f2..d3a6b4e01ebfb8 100644
--- a/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll
+++ b/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll
@@ -358,7 +358,7 @@ declare void @external() #0
; GCN: .set multi_call_with_external.num_vgpr, max(41, amdgpu.max_num_vgpr)
; GCN: .set multi_call_with_external.num_agpr, max(0, amdgpu.max_num_agpr)
; GCN: .set multi_call_with_external.numbered_sgpr, max(42, amdgpu.max_num_sgpr)
-; GCN: .set multi_call_with_external.private_seg_size, 0
+; GCN: .set multi_call_with_external.private_seg_size, 0+(max(use_stack0.private_seg_size, use_stack1.private_seg_size))
; GCN: .set multi_call_with_external.uses_vcc, 1
; GCN: .set multi_call_with_external.uses_flat_scratch, 1
; GCN: .set multi_call_with_external.has_dyn_sized_stack, 1
@@ -366,7 +366,7 @@ declare void @external() #0
; GCN: .set multi_call_with_external.has_indirect_call, 1
; GCN: TotalNumSgprs: multi_call_with_external.numbered_sgpr+6
; GCN: NumVgprs: multi_call_with_external.num_vgpr
-; GCN: ScratchSize: 0
+; GCN: ScratchSize: 2052
define amdgpu_kernel void @multi_call_with_external() #0 {
call void @use_stack0()
call void @use_stack1()
@@ -374,6 +374,29 @@ define amdgpu_kernel void @multi_call_with_external() #0 {
ret void
}
+; GCN-LABEL: {{^}}multi_call_with_external_and_duplicates:
+; GCN: .set multi_call_with_external_and_duplicates.num_vgpr, max(41, amdgpu.max_num_vgpr)
+; GCN: .set multi_call_with_external_and_duplicates.num_agpr, max(0, amdgpu.max_num_agpr)
+; GCN: .set multi_call_with_external_and_duplicates.numbered_sgpr, max(44, amdgpu.max_num_sgpr)
+; GCN: .set multi_call_with_external_and_duplicates.private_seg_size, 0+(max(use_stack0.private_seg_size, use_stack1.private_seg_size))
+; GCN: .set multi_call_with_external_and_duplicates.uses_vcc, 1
+; GCN: .set multi_call_with_external_and_duplicates.uses_flat_scratch, 1
+; GCN: .set multi_call_with_external_and_duplicates.has_dyn_sized_stack, 1
+; GCN: .set multi_call_with_external_and_duplicates.has_recursion, 0
+; GCN: .set multi_call_with_external_and_duplicates.has_indirect_call, 1
+; GCN: TotalNumSgprs: multi_call_with_external_and_duplicates.numbered_sgpr+6
+; GCN: NumVgprs: multi_call_with_external_and_duplicates.num_vgpr
+; GCN: ScratchSize: 2052
+define amdgpu_kernel void @multi_call_with_external_and_duplicates() #0 {
+ call void @use_stack0()
+ call void @use_stack0()
+ call void @use_stack1()
+ call void @use_stack1()
+ call void @external()
+ call void @external()
+ ret void
+}
+
; GCN-LABEL: {{^}}usage_external:
; GCN: .set usage_external.num_vgpr, max(32, amdgpu.max_num_vgpr)
; GCN: .set usage_external.num_agpr, max(0, amdgpu.max_num_agpr)
More information about the llvm-commits
mailing list