[Openmp-commits] [PATCH] D147511: [OpenMP] Fix nextgen plugin thread_limit clause bug when passing negative values.

Michael Halkenhäuser via Phabricator via Openmp-commits openmp-commits at lists.llvm.org
Wed Apr 12 04:10:27 PDT 2023


mhalk updated this revision to Diff 512769.
mhalk added a comment.

Rebase + clarifications on this patch's intent


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D147511/new/

https://reviews.llvm.org/D147511

Files:
  openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
  openmp/libomptarget/test/offloading/negative_thread_limit.cpp


Index: openmp/libomptarget/test/offloading/negative_thread_limit.cpp
===================================================================
--- /dev/null
+++ openmp/libomptarget/test/offloading/negative_thread_limit.cpp
@@ -0,0 +1,51 @@
+// RUN: %libomptarget-compilexx-run-and-check-amdgcn-amd-amdhsa \
+// RUN:   -check-prefixes=INFO,AMDGPU
+// RUN: %libomptarget-compilexx-run-and-check-nvptx64-nvidia-cuda \
+// RUN:   -check-prefixes=INFO,NVIDIAGPU
+
+#include <omp.h>
+#include <stdio.h>
+
+void createThreads(int threadLimit) {
+  int threadCount = 0;
+#pragma omp target teams distribute parallel for map(from : threadCount)       \
+    thread_limit(threadLimit)
+  for (int i = 0; i < 1; ++i)
+    threadCount = omp_get_num_threads();
+
+  printf("threadCount=%d\n", threadCount);
+}
+
+int main(int argc, char *argv[]) {
+  int isHost = -1;
+
+#pragma omp target map(from : isHost)
+  { isHost = omp_is_initial_device(); }
+
+  // Make sure we run on device
+  printf("Target region executed on the %s\n", isHost ? "host" : "device");
+
+  // Set the thread limit to a large negative number, such that the
+  // addition of GV_Warp_Size cannot push this into positive range
+  // thread_limit: value < -GV_Warp_Size  < 0 -- expected: GV_Default_WG_Size
+  createThreads((int)0xF0000000);
+
+  // thread_limit: value > GV_Max_WG_Size > 0 -- expected: GV_Max_WG_Size
+  // Note: on nvidia this could also be CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK
+  createThreads((int)0x40000000);
+
+  // thread_limit: value == 0                 -- expected: GV_Default_WG_Size
+  createThreads(0);
+
+  return isHost;
+}
+
+// INFO: Target region executed on the device
+
+// AMDGPU: threadCount=256
+// AMDGPU: threadCount=1024
+// AMDGPU: threadCount=256
+
+// NVIDIAGPU: threadCount=128
+// NVIDIAGPU: threadCount={{1024|384}}
+// NVIDIAGPU: threadCount=128
Index: openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
===================================================================
--- openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
+++ openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
@@ -296,12 +296,13 @@
                                         uint32_t ThreadLimitClause[3]) const {
   assert(ThreadLimitClause[1] == 0 && ThreadLimitClause[2] == 0 &&
          "Multi dimensional launch not supported yet.");
-  if (ThreadLimitClause[0] > 0 && isGenericMode())
+
+  if (static_cast<int32_t>(ThreadLimitClause[0]) <= 0)
+    ThreadLimitClause[0] = PreferredNumThreads;
+  else if (ThreadLimitClause[0] > 0 && isGenericMode())
     ThreadLimitClause[0] += GenericDevice.getWarpSize();
 
-  return std::min(MaxNumThreads, (ThreadLimitClause[0] > 0)
-                                     ? ThreadLimitClause[0]
-                                     : PreferredNumThreads);
+  return std::min(MaxNumThreads, ThreadLimitClause[0]);
 }
 
 uint64_t GenericKernelTy::getNumBlocks(GenericDeviceTy &GenericDevice,


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D147511.512769.patch
Type: text/x-patch
Size: 2996 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/openmp-commits/attachments/20230412/0b521320/attachment.bin>


More information about the Openmp-commits mailing list