[llvm] [AMDGPU] Fix stack size metadata for functions with direct and indirect calls (PR #110828)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 2 04:38:52 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Janek van Oirschot (JanekvO)
<details>
<summary>Changes</summary>
When a function has an external call, it should still use the stack sizes of direct, known, calls to calculate its own stack size
---
Full diff: https://github.com/llvm/llvm-project/pull/110828.diff
2 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp (+6-2)
- (modified) llvm/test/CodeGen/AMDGPU/function-resource-usage.ll (+2-2)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
index f608a9a4f470fa..3cb9825a64ff9b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
@@ -157,8 +157,12 @@ void MCResourceInfo::gatherResourceInfo(
ArgExprs.push_back(
MCConstantExpr::create(FRI.CalleeSegmentSize, OutContext));
- if (!FRI.HasIndirectCall) {
- for (const Function *Callee : FRI.Callees) {
+ SmallPtrSet<const Function *, 8> Seen;
+ Seen.insert(&MF.getFunction());
+ for (const Function *Callee : FRI.Callees) {
+ if (!Seen.insert(Callee).second)
+ continue;
+ if (!Callee->isDeclaration()) {
MCSymbol *calleeValSym =
getSymbol(Callee->getName(), RIK_PrivateSegSize, OutContext);
ArgExprs.push_back(MCSymbolRefExpr::create(calleeValSym, OutContext));
diff --git a/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll b/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll
index 9e3264eb9c07f2..c38baf79c3781d 100644
--- a/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll
+++ b/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll
@@ -358,7 +358,7 @@ declare void @external() #0
; GCN: .set multi_call_with_external.num_vgpr, max(41, amdgpu.max_num_vgpr)
; GCN: .set multi_call_with_external.num_agpr, max(0, amdgpu.max_num_agpr)
; GCN: .set multi_call_with_external.numbered_sgpr, max(42, amdgpu.max_num_sgpr)
-; GCN: .set multi_call_with_external.private_seg_size, 0
+; GCN: .set multi_call_with_external.private_seg_size, 0+(max(use_stack0.private_seg_size, use_stack1.private_seg_size))
; GCN: .set multi_call_with_external.uses_vcc, 1
; GCN: .set multi_call_with_external.uses_flat_scratch, 1
; GCN: .set multi_call_with_external.has_dyn_sized_stack, 1
@@ -366,7 +366,7 @@ declare void @external() #0
; GCN: .set multi_call_with_external.has_indirect_call, 1
; GCN: TotalNumSgprs: multi_call_with_external.numbered_sgpr+6
; GCN: NumVgprs: multi_call_with_external.num_vgpr
-; GCN: ScratchSize: 0
+; GCN: ScratchSize: 2052
define amdgpu_kernel void @multi_call_with_external() #0 {
call void @use_stack0()
call void @use_stack1()
``````````
</details>
https://github.com/llvm/llvm-project/pull/110828
More information about the llvm-commits
mailing list