[llvm] e353195 - [AMDGPU] Fix stack size metadata for functions with direct and indirect calls (#110828)

via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 2 06:52:55 PDT 2024


Author: Janek van Oirschot
Date: 2024-10-02T14:52:52+01:00
New Revision: e35319524a3f5834ea1f5a7f7d7624a295be8ab7

URL: https://github.com/llvm/llvm-project/commit/e35319524a3f5834ea1f5a7f7d7624a295be8ab7
DIFF: https://github.com/llvm/llvm-project/commit/e35319524a3f5834ea1f5a7f7d7624a295be8ab7.diff

LOG: [AMDGPU] Fix stack size metadata for functions with direct and indirect calls (#110828)

When a function has an external call, it should still use the stack
sizes of direct, known, calls to calculate its own stack size

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
    llvm/test/CodeGen/AMDGPU/function-resource-usage.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
index 1de6bf1631a25b..da0397fa20bd1b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
@@ -159,8 +159,12 @@ void MCResourceInfo::gatherResourceInfo(
       ArgExprs.push_back(
           MCConstantExpr::create(FRI.CalleeSegmentSize, OutContext));
 
-    if (!FRI.HasIndirectCall) {
-      for (const Function *Callee : FRI.Callees) {
+    SmallPtrSet<const Function *, 8> Seen;
+    Seen.insert(&MF.getFunction());
+    for (const Function *Callee : FRI.Callees) {
+      if (!Seen.insert(Callee).second)
+        continue;
+      if (!Callee->isDeclaration()) {
         MCSymbol *calleeValSym =
             getSymbol(Callee->getName(), RIK_PrivateSegSize, OutContext);
         ArgExprs.push_back(MCSymbolRefExpr::create(calleeValSym, OutContext));

diff  --git a/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll b/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll
index 9e3264eb9c07f2..d3a6b4e01ebfb8 100644
--- a/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll
+++ b/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll
@@ -358,7 +358,7 @@ declare void @external() #0
 ; GCN:	.set multi_call_with_external.num_vgpr, max(41, amdgpu.max_num_vgpr)
 ; GCN:	.set multi_call_with_external.num_agpr, max(0, amdgpu.max_num_agpr)
 ; GCN:	.set multi_call_with_external.numbered_sgpr, max(42, amdgpu.max_num_sgpr)
-; GCN:	.set multi_call_with_external.private_seg_size, 0
+; GCN:	.set multi_call_with_external.private_seg_size, 0+(max(use_stack0.private_seg_size, use_stack1.private_seg_size))
 ; GCN:	.set multi_call_with_external.uses_vcc, 1
 ; GCN:	.set multi_call_with_external.uses_flat_scratch, 1
 ; GCN:	.set multi_call_with_external.has_dyn_sized_stack, 1
@@ -366,7 +366,7 @@ declare void @external() #0
 ; GCN:	.set multi_call_with_external.has_indirect_call, 1
 ; GCN: TotalNumSgprs: multi_call_with_external.numbered_sgpr+6
 ; GCN: NumVgprs: multi_call_with_external.num_vgpr
-; GCN: ScratchSize: 0
+; GCN: ScratchSize: 2052
 define amdgpu_kernel void @multi_call_with_external() #0 {
   call void @use_stack0()
   call void @use_stack1()
@@ -374,6 +374,29 @@ define amdgpu_kernel void @multi_call_with_external() #0 {
   ret void
 }
 
+; GCN-LABEL: {{^}}multi_call_with_external_and_duplicates:
+; GCN:	.set multi_call_with_external_and_duplicates.num_vgpr, max(41, amdgpu.max_num_vgpr)
+; GCN:	.set multi_call_with_external_and_duplicates.num_agpr, max(0, amdgpu.max_num_agpr)
+; GCN:	.set multi_call_with_external_and_duplicates.numbered_sgpr, max(44, amdgpu.max_num_sgpr)
+; GCN:	.set multi_call_with_external_and_duplicates.private_seg_size, 0+(max(use_stack0.private_seg_size, use_stack1.private_seg_size))
+; GCN:	.set multi_call_with_external_and_duplicates.uses_vcc, 1
+; GCN:	.set multi_call_with_external_and_duplicates.uses_flat_scratch, 1
+; GCN:	.set multi_call_with_external_and_duplicates.has_dyn_sized_stack, 1
+; GCN:	.set multi_call_with_external_and_duplicates.has_recursion, 0
+; GCN:	.set multi_call_with_external_and_duplicates.has_indirect_call, 1
+; GCN: TotalNumSgprs: multi_call_with_external_and_duplicates.numbered_sgpr+6
+; GCN: NumVgprs: multi_call_with_external_and_duplicates.num_vgpr
+; GCN: ScratchSize: 2052
+define amdgpu_kernel void @multi_call_with_external_and_duplicates() #0 {
+  call void @use_stack0()
+  call void @use_stack0()
+  call void @use_stack1()
+  call void @use_stack1()
+  call void @external()
+  call void @external()
+  ret void
+}
+
 ; GCN-LABEL: {{^}}usage_external:
 ; GCN:	.set usage_external.num_vgpr, max(32, amdgpu.max_num_vgpr)
 ; GCN:	.set usage_external.num_agpr, max(0, amdgpu.max_num_agpr)


        


More information about the llvm-commits mailing list