[Openmp-commits] [PATCH] D147511: [OpenMP] Fix nextgen plugin thread_limit clause bug when passing negative values.
Michael Halkenhäuser via Phabricator via Openmp-commits
openmp-commits at lists.llvm.org
Wed Apr 12 04:10:27 PDT 2023
mhalk updated this revision to Diff 512769.
mhalk added a comment.
Rebase + clarifications on this patch's intent
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D147511/new/
https://reviews.llvm.org/D147511
Files:
openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
openmp/libomptarget/test/offloading/negative_thread_limit.cpp
Index: openmp/libomptarget/test/offloading/negative_thread_limit.cpp
===================================================================
--- /dev/null
+++ openmp/libomptarget/test/offloading/negative_thread_limit.cpp
@@ -0,0 +1,51 @@
+// RUN: %libomptarget-compilexx-run-and-check-amdgcn-amd-amdhsa \
+// RUN: -check-prefixes=INFO,AMDGPU
+// RUN: %libomptarget-compilexx-run-and-check-nvptx64-nvidia-cuda \
+// RUN: -check-prefixes=INFO,NVIDIAGPU
+
+#include <omp.h>
+#include <stdio.h>
+
+void createThreads(int threadLimit) {
+ int threadCount = 0;
+#pragma omp target teams distribute parallel for map(from : threadCount) \
+ thread_limit(threadLimit)
+ for (int i = 0; i < 1; ++i)
+ threadCount = omp_get_num_threads();
+
+ printf("threadCount=%d\n", threadCount);
+}
+
+int main(int argc, char *argv[]) {
+ int isHost = -1;
+
+#pragma omp target map(from : isHost)
+ { isHost = omp_is_initial_device(); }
+
+ // Make sure we run on device
+ printf("Target region executed on the %s\n", isHost ? "host" : "device");
+
+ // Set the thread limit to a large negative number, such that the
+ // addition of GV_Warp_Size cannot push this into positive range
+ // thread_limit: value < -GV_Warp_Size < 0 -- expected: GV_Default_WG_Size
+ createThreads((int)0xF0000000);
+
+ // thread_limit: value > GV_Max_WG_Size > 0 -- expected: GV_Max_WG_Size
+ // Note: on nvidia this could also be CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK
+ createThreads((int)0x40000000);
+
+ // thread_limit: value == 0 -- expected: GV_Default_WG_Size
+ createThreads(0);
+
+ return isHost;
+}
+
+// INFO: Target region executed on the device
+
+// AMDGPU: threadCount=256
+// AMDGPU: threadCount=1024
+// AMDGPU: threadCount=256
+
+// NVIDIAGPU: threadCount=128
+// NVIDIAGPU: threadCount={{1024|384}}
+// NVIDIAGPU: threadCount=128
Index: openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
===================================================================
--- openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
+++ openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
@@ -296,12 +296,13 @@
uint32_t ThreadLimitClause[3]) const {
assert(ThreadLimitClause[1] == 0 && ThreadLimitClause[2] == 0 &&
"Multi dimensional launch not supported yet.");
- if (ThreadLimitClause[0] > 0 && isGenericMode())
+
+ if (static_cast<int32_t>(ThreadLimitClause[0]) <= 0)
+ ThreadLimitClause[0] = PreferredNumThreads;
+ else if (ThreadLimitClause[0] > 0 && isGenericMode())
ThreadLimitClause[0] += GenericDevice.getWarpSize();
- return std::min(MaxNumThreads, (ThreadLimitClause[0] > 0)
- ? ThreadLimitClause[0]
- : PreferredNumThreads);
+ return std::min(MaxNumThreads, ThreadLimitClause[0]);
}
uint64_t GenericKernelTy::getNumBlocks(GenericDeviceTy &GenericDevice,
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D147511.512769.patch
Type: text/x-patch
Size: 2996 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/openmp-commits/attachments/20230412/0b521320/attachment.bin>
More information about the Openmp-commits
mailing list