[llvm] 8dfe60c - AMDGPU: Set scratch_en if there is dynamic stack but no fixed stack

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 4 17:51:23 PST 2023


Author: Matt Arsenault
Date: 2023-01-04T20:51:18-05:00
New Revision: 8dfe60c35670a2d6d3017528de95300d797231c0

URL: https://github.com/llvm/llvm-project/commit/8dfe60c35670a2d6d3017528de95300d797231c0
DIFF: https://github.com/llvm/llvm-project/commit/8dfe60c35670a2d6d3017528de95300d797231c0.diff

LOG: AMDGPU: Set scratch_en if there is dynamic stack but no fixed stack

Added: 
    llvm/test/CodeGen/AMDGPU/enable-scratch-only-dynamic-stack.ll

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 63b7645e32619..d3e21f6ff6c15 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -908,7 +908,8 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
   // anything to disable it if we know the stack isn't used here. We may still
   // have emitted code reading it to initialize scratch, but if that's unused
   // reading garbage should be OK.
-  const bool EnablePrivateSegment = ProgInfo.ScratchBlocks > 0;
+  const bool EnablePrivateSegment =
+      ProgInfo.ScratchBlocks > 0 || ProgInfo.DynamicCallStack;
   ProgInfo.ComputePGMRSrc2 =
       S_00B84C_SCRATCH_EN(EnablePrivateSegment) |
       S_00B84C_USER_SGPR(MFI->getNumUserSGPRs()) |

diff  --git a/llvm/test/CodeGen/AMDGPU/enable-scratch-only-dynamic-stack.ll b/llvm/test/CodeGen/AMDGPU/enable-scratch-only-dynamic-stack.ll
new file mode 100644
index 0000000000000..e911eb01323f1
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/enable-scratch-only-dynamic-stack.ll
@@ -0,0 +1,20 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 < %s | FileCheck -check-prefixes=GCN,COV5 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck -check-prefixes=GCN,COV4 %s
+
+ at gv.fptr0 = external hidden unnamed_addr addrspace(4) constant ptr, align 4
+
+; No stack objects, only indirect call has to enable scrathch
+; GCN-LABEL: test_indirect_call:
+
+; COV5: .amdhsa_private_segment_fixed_size 0{{$}}
+; COV4: .amdhsa_private_segment_fixed_size 16384{{$}}
+
+; GCN: .amdhsa_user_sgpr_private_segment_buffer 1
+
+; COV5: .amdhsa_uses_dynamic_stack 1
+; GCN: .amdhsa_system_sgpr_private_segment_wavefront_offset 1
+define amdgpu_kernel void @test_indirect_call() {
+  %fptr = load ptr, ptr addrspace(4) @gv.fptr0
+  call void %fptr()
+  ret void
+}


        


More information about the llvm-commits mailing list