[llvm] 3604fdf - [AMDGPU] Do not assume stack size for PAL code object indirect calls

Mon Jun 12 07:15:25 PDT 2023

Author: Baptiste
Date: 2023-06-12T10:14:17-04:00
New Revision: 3604fdf18d351a8627611c343e522912e3486f77

URL: https://github.com/llvm/llvm-project/commit/3604fdf18d351a8627611c343e522912e3486f77
DIFF: https://github.com/llvm/llvm-project/commit/3604fdf18d351a8627611c343e522912e3486f77.diff

LOG: [AMDGPU] Do not assume stack size for PAL code object indirect calls

There is no need to set a big default stack size for PAL code object indirect
calls. The driver knows the max recursion depth, so it can compute a more
accurate value from the minimum scratch size.

Reviewed By: arsenm

Differential Revision: https://reviews.llvm.org/D150609

Added: 
    llvm/test/CodeGen/AMDGPU/resource-usage-pal.ll

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
index c16d089ae8ee8..804bf503e4f9f 100644

--- a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
@@ -104,6 +104,7 @@ bool AMDGPUResourceUsageAnalysis::runOnModule(Module &M) {
 
   MachineModuleInfo &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
   const TargetMachine &TM = TPC->getTM<TargetMachine>();
+  const MCSubtargetInfo &STI = *TM.getMCSubtargetInfo();
   bool HasIndirectCall = false;
 
   CallGraph CG = CallGraph(M);
@@ -111,7 +112,8 @@ bool AMDGPUResourceUsageAnalysis::runOnModule(Module &M) {
 
   // By default, for code object v5 and later, track only the minimum scratch
   // size
-  if (AMDGPU::getCodeObjectVersion(M) >= AMDGPU::AMDHSA_COV5) {
+  if (AMDGPU::getCodeObjectVersion(M) >= AMDGPU::AMDHSA_COV5 ||
+      STI.getTargetTriple().getOS() == Triple::AMDPAL) {
     if (!AssumedStackSizeForDynamicSizeObjects.getNumOccurrences())
       AssumedStackSizeForDynamicSizeObjects = 0;
     if (!AssumedStackSizeForExternalCall.getNumOccurrences())

diff  --git a/llvm/test/CodeGen/AMDGPU/resource-usage-pal.ll b/llvm/test/CodeGen/AMDGPU/resource-usage-pal.ll
new file mode 100644
index 0000000000000..55b84116b5c3b
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/resource-usage-pal.ll
@@ -0,0 +1,17 @@
+; RUN llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s
+
+; Check that we do not assume any default stack size for PAL code object
+; indirect calls. The driver knows the max recursion depth, so it can compute
+; a more accurate value.
+
+; CHECK: ScratchSize: 0
+; CHECK: scratch_memory_size: 0
+define amdgpu_vs void @test() {
+.entry:
+  %0 = call i64 @llvm.amdgcn.s.getpc()
+  %1 = inttoptr i64 %0 to ptr
+  call amdgpu_gfx void %1()
+  ret void
+}
+
+declare i64 @llvm.amdgcn.s.getpc()