[PATCH] D29974: [AMDGPU] Fix MaxWorkGroupsPerCU for large workgroups

Stanislav Mekhanoshin via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Tue Feb 14 17:15:39 PST 2017


This revision was automatically updated to reflect the committed changes.
Closed by commit rL295134: [AMDGPU] Fix MaxWorkGroupsPerCU for large workgroups (authored by rampitec).

Changed prior to commit:
  https://reviews.llvm.org/D29974?vs=88468&id=88469#toc

Repository:
  rL LLVM

https://reviews.llvm.org/D29974

Files:
  llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
  llvm/trunk/test/CodeGen/AMDGPU/large-work-group-promote-alloca.ll


Index: llvm/trunk/test/CodeGen/AMDGPU/large-work-group-promote-alloca.ll
===================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/large-work-group-promote-alloca.ll
+++ llvm/trunk/test/CodeGen/AMDGPU/large-work-group-promote-alloca.ll
@@ -69,7 +69,8 @@
 }
 
 ; ALL-LABEL: @occupancy_0(
-; ALL: alloca [5 x i32]
+; CI-NOT: alloca [5 x i32]
+; SI: alloca [5 x i32]
 define void @occupancy_0(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #3 {
 entry:
   %stack = alloca [5 x i32], align 4
@@ -91,7 +92,8 @@
 }
 
 ; ALL-LABEL: @occupancy_max(
-; ALL: alloca [5 x i32]
+; CI-NOT: alloca [5 x i32]
+; SI: alloca [5 x i32]
 define void @occupancy_max(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #4 {
 entry:
   %stack = alloca [5 x i32], align 4
Index: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
===================================================================
--- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -151,7 +151,11 @@
                                unsigned FlatWorkGroupSize) {
   if (!Features.test(FeatureGCN))
     return 8;
-  return getWavesPerWorkGroup(Features, FlatWorkGroupSize) == 1 ? 40 : 16;
+  unsigned N = getWavesPerWorkGroup(Features, FlatWorkGroupSize);
+  if (N == 1)
+    return 40;
+  N = 40 / N;
+  return std::min(N, 16u);
 }
 
 unsigned getMaxWavesPerCU(const FeatureBitset &Features) {


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D29974.88469.patch
Type: text/x-patch
Size: 1489 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170215/9a6f1b89/attachment.bin>


More information about the llvm-commits mailing list