[Openmp-commits] [PATCH] D147511: [OpenMP] Fix nextgen plugin thread_limit clause bug on generic kernels.

Michael Halkenhäuser via Phabricator via Openmp-commits openmp-commits at lists.llvm.org
Sat Apr 8 08:51:15 PDT 2023


mhalk updated this revision to Diff 511895.
mhalk added a comment.

Refactored the fix such taht it covers not only the '-1' case, but all negative integers.
Added a testcase, also to demonstrate the issue.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D147511/new/

https://reviews.llvm.org/D147511

Files:
  openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
  openmp/libomptarget/test/offloading/negative_thread_limit.cpp


Index: openmp/libomptarget/test/offloading/negative_thread_limit.cpp
===================================================================
--- /dev/null
+++ openmp/libomptarget/test/offloading/negative_thread_limit.cpp
@@ -0,0 +1,51 @@
+// RUN: %libomptarget-compilexx-run-and-check-amdgcn-amd-amdhsa \
+// RUN:   -check-prefixes=INFO,AMDGPU
+// RUN: %libomptarget-compilexx-run-and-check-nvptx64-nvidia-cuda \
+// RUN:   -check-prefixes=INFO,NVIDIAGPU
+
+#include <omp.h>
+#include <stdio.h>
+
+void createThreads(int threadLimit) {
+  int threadCount = 0;
+#pragma omp target teams distribute parallel for map(from : threadCount)       \
+    thread_limit(threadLimit)
+  for (int i = 0; i < 1; ++i)
+    threadCount = omp_get_num_threads();
+
+  printf("threadCount=%d\n", threadCount);
+}
+
+int main(int argc, char *argv[]) {
+  int isHost = -1;
+
+#pragma omp target map(from : isHost)
+  { isHost = omp_is_initial_device(); }
+
+  // Make sure we run on device
+  printf("Target region executed on the %s\n", isHost ? "host" : "device");
+
+  // Set the thread limit to a large negative number, such that the
+  // addition of GV_Warp_Size cannot push this into positive range
+  // thread_limit: value < -GV_Warp_Size  < 0 -- expected: GV_Default_WG_Size
+  createThreads((int)0xF0000000);
+
+  // thread_limit: value > GV_Max_WG_Size > 0 -- expected: GV_Max_WG_Size
+  // Note: on nvidia this could also be CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK
+  createThreads((int)0x40000000);
+
+  // thread_limit: value == 0                 -- expected: GV_Default_WG_Size
+  createThreads(0);
+
+  return isHost;
+}
+
+// INFO: Target region executed on the device
+
+// AMDGPU: threadCount=256
+// AMDGPU: threadCount=1024
+// AMDGPU: threadCount=256
+
+// NVIDIAGPU: threadCount=128
+// NVIDIAGPU: threadCount={{1024|384}}
+// NVIDIAGPU: threadCount=128
Index: openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
===================================================================
--- openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
+++ openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
@@ -296,7 +296,11 @@
                                         uint32_t ThreadLimitClause[3]) const {
   assert(ThreadLimitClause[1] == 0 && ThreadLimitClause[2] == 0 &&
          "Multi dimensional launch not supported yet.");
-  if (ThreadLimitClause[0] > 0 && isGenericMode())
+
+  // If the thread limit was passed as negative integer, use PreferredNumThreads
+  if (ThreadLimitClause[0] & (0x1 << 31))
+    ThreadLimitClause[0] = PreferredNumThreads;
+  else if (ThreadLimitClause[0] > 0 && isGenericMode())
     ThreadLimitClause[0] += GenericDevice.getWarpSize();
 
   return std::min(MaxNumThreads, (ThreadLimitClause[0] > 0)


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D147511.511895.patch
Type: text/x-patch
Size: 2807 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/openmp-commits/attachments/20230408/6e7aca19/attachment.bin>


More information about the Openmp-commits mailing list