[PATCH] D29974: [AMDGPU] Fix MaxWorkGroupsPerCU for large workgroups
Stanislav Mekhanoshin via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 14 17:15:39 PST 2017
This revision was automatically updated to reflect the committed changes.
Closed by commit rL295134: [AMDGPU] Fix MaxWorkGroupsPerCU for large workgroups (authored by rampitec).
Changed prior to commit:
https://reviews.llvm.org/D29974?vs=88468&id=88469#toc
Repository:
rL LLVM
https://reviews.llvm.org/D29974
Files:
llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
llvm/trunk/test/CodeGen/AMDGPU/large-work-group-promote-alloca.ll
Index: llvm/trunk/test/CodeGen/AMDGPU/large-work-group-promote-alloca.ll
===================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/large-work-group-promote-alloca.ll
+++ llvm/trunk/test/CodeGen/AMDGPU/large-work-group-promote-alloca.ll
@@ -69,7 +69,8 @@
}
; ALL-LABEL: @occupancy_0(
-; ALL: alloca [5 x i32]
+; CI-NOT: alloca [5 x i32]
+; SI: alloca [5 x i32]
define void @occupancy_0(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #3 {
entry:
%stack = alloca [5 x i32], align 4
@@ -91,7 +92,8 @@
}
; ALL-LABEL: @occupancy_max(
-; ALL: alloca [5 x i32]
+; CI-NOT: alloca [5 x i32]
+; SI: alloca [5 x i32]
define void @occupancy_max(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #4 {
entry:
%stack = alloca [5 x i32], align 4
Index: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
===================================================================
--- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -151,7 +151,11 @@
unsigned FlatWorkGroupSize) {
if (!Features.test(FeatureGCN))
return 8;
- return getWavesPerWorkGroup(Features, FlatWorkGroupSize) == 1 ? 40 : 16;
+ unsigned N = getWavesPerWorkGroup(Features, FlatWorkGroupSize);
+ if (N == 1)
+ return 40;
+ N = 40 / N;
+ return std::min(N, 16u);
}
unsigned getMaxWavesPerCU(const FeatureBitset &Features) {
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D29974.88469.patch
Type: text/x-patch
Size: 1489 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170215/9a6f1b89/attachment.bin>
More information about the llvm-commits
mailing list