[llvm] r295134 - [AMDGPU] Fix MaxWorkGroupsPerCU for large workgroups
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 14 17:03:59 PST 2017
Author: rampitec
Date: Tue Feb 14 19:03:59 2017
New Revision: 295134
URL: http://llvm.org/viewvc/llvm-project?rev=295134&view=rev
Log:
[AMDGPU] Fix MaxWorkGroupsPerCU for large workgroups
This patch corrects the maximum workgroups per CU if we have big
workgroups (more than 128). This calculation contributes to the
occupancy calculation in respect to LDS size.
Differential Revision: https://reviews.llvm.org/D29974
Modified:
llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
llvm/trunk/test/CodeGen/AMDGPU/large-work-group-promote-alloca.ll
Modified: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp?rev=295134&r1=295133&r2=295134&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp Tue Feb 14 19:03:59 2017
@@ -151,7 +151,11 @@ unsigned getMaxWorkGroupsPerCU(const Fea
unsigned FlatWorkGroupSize) {
if (!Features.test(FeatureGCN))
return 8;
- return getWavesPerWorkGroup(Features, FlatWorkGroupSize) == 1 ? 40 : 16;
+ unsigned N = getWavesPerWorkGroup(Features, FlatWorkGroupSize);
+ if (N == 1)
+ return 40;
+ N = 40 / N;
+ return std::min(N, 16u);
}
unsigned getMaxWavesPerCU(const FeatureBitset &Features) {
Modified: llvm/trunk/test/CodeGen/AMDGPU/large-work-group-promote-alloca.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/large-work-group-promote-alloca.ll?rev=295134&r1=295133&r2=295134&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/large-work-group-promote-alloca.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/large-work-group-promote-alloca.ll Tue Feb 14 19:03:59 2017
@@ -69,7 +69,8 @@ entry:
}
; ALL-LABEL: @occupancy_0(
-; ALL: alloca [5 x i32]
+; CI-NOT: alloca [5 x i32]
+; SI: alloca [5 x i32]
define void @occupancy_0(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #3 {
entry:
%stack = alloca [5 x i32], align 4
@@ -91,7 +92,8 @@ entry:
}
; ALL-LABEL: @occupancy_max(
-; ALL: alloca [5 x i32]
+; CI-NOT: alloca [5 x i32]
+; SI: alloca [5 x i32]
define void @occupancy_max(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #4 {
entry:
%stack = alloca [5 x i32], align 4
More information about the llvm-commits
mailing list