[Openmp-commits] [PATCH] D147511: [OpenMP] Fix nextgen plugin thread_limit clause bug on generic kernels.
Michael Halkenhäuser via Phabricator via Openmp-commits
openmp-commits at lists.llvm.org
Sat Apr 8 08:51:15 PDT 2023
mhalk updated this revision to Diff 511895.
mhalk added a comment.
Refactored the fix such taht it covers not only the '-1' case, but all negative integers.
Added a testcase, also to demonstrate the issue.
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D147511/new/
https://reviews.llvm.org/D147511
Files:
openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
openmp/libomptarget/test/offloading/negative_thread_limit.cpp
Index: openmp/libomptarget/test/offloading/negative_thread_limit.cpp
===================================================================
--- /dev/null
+++ openmp/libomptarget/test/offloading/negative_thread_limit.cpp
@@ -0,0 +1,51 @@
+// RUN: %libomptarget-compilexx-run-and-check-amdgcn-amd-amdhsa \
+// RUN: -check-prefixes=INFO,AMDGPU
+// RUN: %libomptarget-compilexx-run-and-check-nvptx64-nvidia-cuda \
+// RUN: -check-prefixes=INFO,NVIDIAGPU
+
+#include <omp.h>
+#include <stdio.h>
+
+void createThreads(int threadLimit) {
+ int threadCount = 0;
+#pragma omp target teams distribute parallel for map(from : threadCount) \
+ thread_limit(threadLimit)
+ for (int i = 0; i < 1; ++i)
+ threadCount = omp_get_num_threads();
+
+ printf("threadCount=%d\n", threadCount);
+}
+
+int main(int argc, char *argv[]) {
+ int isHost = -1;
+
+#pragma omp target map(from : isHost)
+ { isHost = omp_is_initial_device(); }
+
+ // Make sure we run on device
+ printf("Target region executed on the %s\n", isHost ? "host" : "device");
+
+ // Set the thread limit to a large negative number, such that the
+ // addition of GV_Warp_Size cannot push this into positive range
+ // thread_limit: value < -GV_Warp_Size < 0 -- expected: GV_Default_WG_Size
+ createThreads((int)0xF0000000);
+
+ // thread_limit: value > GV_Max_WG_Size > 0 -- expected: GV_Max_WG_Size
+ // Note: on nvidia this could also be CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK
+ createThreads((int)0x40000000);
+
+ // thread_limit: value == 0 -- expected: GV_Default_WG_Size
+ createThreads(0);
+
+ return isHost;
+}
+
+// INFO: Target region executed on the device
+
+// AMDGPU: threadCount=256
+// AMDGPU: threadCount=1024
+// AMDGPU: threadCount=256
+
+// NVIDIAGPU: threadCount=128
+// NVIDIAGPU: threadCount={{1024|384}}
+// NVIDIAGPU: threadCount=128
Index: openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
===================================================================
--- openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
+++ openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
@@ -296,7 +296,11 @@
uint32_t ThreadLimitClause[3]) const {
assert(ThreadLimitClause[1] == 0 && ThreadLimitClause[2] == 0 &&
"Multi dimensional launch not supported yet.");
- if (ThreadLimitClause[0] > 0 && isGenericMode())
+
+ // If the thread limit was passed as negative integer, use PreferredNumThreads
+ if (ThreadLimitClause[0] & (0x1 << 31))
+ ThreadLimitClause[0] = PreferredNumThreads;
+ else if (ThreadLimitClause[0] > 0 && isGenericMode())
ThreadLimitClause[0] += GenericDevice.getWarpSize();
return std::min(MaxNumThreads, (ThreadLimitClause[0] > 0)
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D147511.511895.patch
Type: text/x-patch
Size: 2807 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/openmp-commits/attachments/20230408/6e7aca19/attachment.bin>
More information about the Openmp-commits
mailing list