[llvm] 8dfe60c - AMDGPU: Set scratch_en if there is dynamic stack but no fixed stack
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 4 17:51:23 PST 2023
Author: Matt Arsenault
Date: 2023-01-04T20:51:18-05:00
New Revision: 8dfe60c35670a2d6d3017528de95300d797231c0
URL: https://github.com/llvm/llvm-project/commit/8dfe60c35670a2d6d3017528de95300d797231c0
DIFF: https://github.com/llvm/llvm-project/commit/8dfe60c35670a2d6d3017528de95300d797231c0.diff
LOG: AMDGPU: Set scratch_en if there is dynamic stack but no fixed stack
Added:
llvm/test/CodeGen/AMDGPU/enable-scratch-only-dynamic-stack.ll
Modified:
llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 63b7645e32619..d3e21f6ff6c15 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -908,7 +908,8 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
// anything to disable it if we know the stack isn't used here. We may still
// have emitted code reading it to initialize scratch, but if that's unused
// reading garbage should be OK.
- const bool EnablePrivateSegment = ProgInfo.ScratchBlocks > 0;
+ const bool EnablePrivateSegment =
+ ProgInfo.ScratchBlocks > 0 || ProgInfo.DynamicCallStack;
ProgInfo.ComputePGMRSrc2 =
S_00B84C_SCRATCH_EN(EnablePrivateSegment) |
S_00B84C_USER_SGPR(MFI->getNumUserSGPRs()) |
diff --git a/llvm/test/CodeGen/AMDGPU/enable-scratch-only-dynamic-stack.ll b/llvm/test/CodeGen/AMDGPU/enable-scratch-only-dynamic-stack.ll
new file mode 100644
index 0000000000000..e911eb01323f1
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/enable-scratch-only-dynamic-stack.ll
@@ -0,0 +1,20 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 < %s | FileCheck -check-prefixes=GCN,COV5 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck -check-prefixes=GCN,COV4 %s
+
+ at gv.fptr0 = external hidden unnamed_addr addrspace(4) constant ptr, align 4
+
+; No stack objects, only indirect call has to enable scrathch
+; GCN-LABEL: test_indirect_call:
+
+; COV5: .amdhsa_private_segment_fixed_size 0{{$}}
+; COV4: .amdhsa_private_segment_fixed_size 16384{{$}}
+
+; GCN: .amdhsa_user_sgpr_private_segment_buffer 1
+
+; COV5: .amdhsa_uses_dynamic_stack 1
+; GCN: .amdhsa_system_sgpr_private_segment_wavefront_offset 1
+define amdgpu_kernel void @test_indirect_call() {
+ %fptr = load ptr, ptr addrspace(4) @gv.fptr0
+ call void %fptr()
+ ret void
+}
More information about the llvm-commits
mailing list