[PATCH] D89805: AMDGPU: Lower the threshold reported for maximum stack size exceeded
Matt Arsenault via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 20 09:40:50 PDT 2020
arsenm created this revision.
arsenm added reviewers: rampitec, t-tye, kerbowa.
Herald added subscribers: hiraditya, tpr, dstuttard, yaxunl, nhaehnle, jvesely, kzhuravl.
Herald added a project: LLVM.
arsenm requested review of this revision.
Herald added a subscriber: wdng.
Check the actual maximum supported stack size for a kernel.
https://reviews.llvm.org/D89805
Files:
llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
llvm/test/CodeGen/AMDGPU/stack-size-overflow.ll
Index: llvm/test/CodeGen/AMDGPU/stack-size-overflow.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/stack-size-overflow.ll
+++ llvm/test/CodeGen/AMDGPU/stack-size-overflow.ll
@@ -3,12 +3,45 @@
declare void @llvm.memset.p5i8.i32(i8 addrspace(5)* nocapture, i8, i32, i32, i1) #1
-; ERROR: error: stack size limit exceeded (4294967296) in stack_size_limit
-; GCN: ; ScratchSize: 4294967296
-define amdgpu_kernel void @stack_size_limit() #0 {
+; ERROR: error: stack size limit exceeded (131061) in stack_size_limit_wave64
+; GCN: ; ScratchSize: 131061
+define amdgpu_kernel void @stack_size_limit_wave64() #0 {
entry:
- %alloca = alloca [1073741823 x i32], align 4, addrspace(5)
- %bc = bitcast [1073741823 x i32] addrspace(5)* %alloca to i8 addrspace(5)*
- call void @llvm.memset.p5i8.i32(i8 addrspace(5)* %bc, i8 9, i32 1073741823, i32 1, i1 true)
+ %alloca = alloca [131057 x i8], align 1, addrspace(5)
+ %alloca.bc = bitcast [131057 x i8] addrspace(5)* %alloca to i8 addrspace(5)*
+ call void @llvm.memset.p5i8.i32(i8 addrspace(5)* %alloca.bc, i8 9, i32 131057, i32 1, i1 true)
ret void
}
+
+; ERROR: error: stack size limit exceeded (262117) in stack_size_limit_wave32
+; GCN: ; ScratchSize: 262117
+define amdgpu_kernel void @stack_size_limit_wave32() #1 {
+entry:
+ %alloca = alloca [262113 x i8], align 1, addrspace(5)
+ %alloca.bc = bitcast [262113 x i8] addrspace(5)* %alloca to i8 addrspace(5)*
+ call void @llvm.memset.p5i8.i32(i8 addrspace(5)* %alloca.bc, i8 9, i32 262113, i32 1, i1 true)
+ ret void
+}
+
+; ERROR-NOT: error:
+; GCN: ; ScratchSize: 131056
+define amdgpu_kernel void @max_stack_size_wave64() #0 {
+entry:
+ %alloca = alloca [131052 x i8], align 1, addrspace(5)
+ %alloca.bc = bitcast [131052 x i8] addrspace(5)* %alloca to i8 addrspace(5)*
+ call void @llvm.memset.p5i8.i32(i8 addrspace(5)* %alloca.bc, i8 9, i32 131052, i32 1, i1 true)
+ ret void
+}
+
+; ERROR-NOT: error:
+; GCN: ; ScratchSize: 262112
+define amdgpu_kernel void @max_stack_size_wave32() #1 {
+entry:
+ %alloca = alloca [262108 x i8], align 1, addrspace(5)
+ %alloca.bc = bitcast [262108 x i8] addrspace(5)* %alloca to i8 addrspace(5)*
+ call void @llvm.memset.p5i8.i32(i8 addrspace(5)* %alloca.bc, i8 9, i32 262108, i32 1, i1 true)
+ ret void
+}
+
+attributes #0 = { "target-cpu" = "gfx900" }
+attributes #1 = { "target-cpu" = "gfx1010" }
Index: llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -422,10 +422,10 @@
SITargetLowering TLInfo;
SIFrameLowering FrameLowering;
+public:
// See COMPUTE_TMPRING_SIZE.WAVESIZE, 13-bit field in units of 256-dword.
static const unsigned MaxWaveScratchSize = (256 * 4) * ((1 << 13) - 1);
-public:
GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
const GCNTargetMachine &TM);
~GCNSubtarget() override;
Index: llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -993,7 +993,9 @@
ProgInfo.FlatUsed = Info.UsesFlatScratch;
ProgInfo.DynamicCallStack = Info.HasDynamicallySizedStack || Info.HasRecursion;
- if (!isUInt<32>(ProgInfo.ScratchSize)) {
+ const uint64_t MaxScratchPerWorkitem =
+ GCNSubtarget::MaxWaveScratchSize / STM.getWavefrontSize();
+ if (ProgInfo.ScratchSize > MaxScratchPerWorkitem) {
DiagnosticInfoStackSize DiagStackSize(MF.getFunction(),
ProgInfo.ScratchSize, DS_Error);
MF.getFunction().getContext().diagnose(DiagStackSize);
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D89805.299386.patch
Type: text/x-patch
Size: 3784 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20201020/447da6f2/attachment.bin>
More information about the llvm-commits
mailing list