[llvm] [AMDGPU] Fix stack size metadata for functions with direct and indirect calls (PR #110828)

via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 2 04:38:52 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Janek van Oirschot (JanekvO)

<details>
<summary>Changes</summary>

When a function has an external call, it should still use the stack sizes of direct, known, calls to calculate its own stack size

---
Full diff: https://github.com/llvm/llvm-project/pull/110828.diff


2 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp (+6-2) 
- (modified) llvm/test/CodeGen/AMDGPU/function-resource-usage.ll (+2-2) 


``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
index f608a9a4f470fa..3cb9825a64ff9b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
@@ -157,8 +157,12 @@ void MCResourceInfo::gatherResourceInfo(
       ArgExprs.push_back(
           MCConstantExpr::create(FRI.CalleeSegmentSize, OutContext));
 
-    if (!FRI.HasIndirectCall) {
-      for (const Function *Callee : FRI.Callees) {
+    SmallPtrSet<const Function *, 8> Seen;
+    Seen.insert(&MF.getFunction());
+    for (const Function *Callee : FRI.Callees) {
+      if (!Seen.insert(Callee).second)
+        continue;
+      if (!Callee->isDeclaration()) {
         MCSymbol *calleeValSym =
             getSymbol(Callee->getName(), RIK_PrivateSegSize, OutContext);
         ArgExprs.push_back(MCSymbolRefExpr::create(calleeValSym, OutContext));
diff --git a/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll b/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll
index 9e3264eb9c07f2..c38baf79c3781d 100644
--- a/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll
+++ b/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll
@@ -358,7 +358,7 @@ declare void @external() #0
 ; GCN:	.set multi_call_with_external.num_vgpr, max(41, amdgpu.max_num_vgpr)
 ; GCN:	.set multi_call_with_external.num_agpr, max(0, amdgpu.max_num_agpr)
 ; GCN:	.set multi_call_with_external.numbered_sgpr, max(42, amdgpu.max_num_sgpr)
-; GCN:	.set multi_call_with_external.private_seg_size, 0
+; GCN:	.set multi_call_with_external.private_seg_size, 0+(max(use_stack0.private_seg_size, use_stack1.private_seg_size))
 ; GCN:	.set multi_call_with_external.uses_vcc, 1
 ; GCN:	.set multi_call_with_external.uses_flat_scratch, 1
 ; GCN:	.set multi_call_with_external.has_dyn_sized_stack, 1
@@ -366,7 +366,7 @@ declare void @external() #0
 ; GCN:	.set multi_call_with_external.has_indirect_call, 1
 ; GCN: TotalNumSgprs: multi_call_with_external.numbered_sgpr+6
 ; GCN: NumVgprs: multi_call_with_external.num_vgpr
-; GCN: ScratchSize: 0
+; GCN: ScratchSize: 2052
 define amdgpu_kernel void @multi_call_with_external() #0 {
   call void @use_stack0()
   call void @use_stack1()

``````````

</details>


https://github.com/llvm/llvm-project/pull/110828


More information about the llvm-commits mailing list