[Openmp-commits] [PATCH] D158802: [OpenMP] Honor `thread_limit` value when choosing grid size
Shilei Tian via Phabricator via Openmp-commits
openmp-commits at lists.llvm.org
Sat Aug 26 18:30:17 PDT 2023
tianshilei1992 updated this revision to Diff 553772.
tianshilei1992 added a comment.
rebase and fix comment
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D158802/new/
https://reviews.llvm.org/D158802
Files:
openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
Index: openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
===================================================================
--- openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
+++ openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
@@ -335,9 +335,11 @@
uint32_t ThreadLimitClause[3]) const;
/// The number of threads \p NumThreads can be adjusted by this method.
+ /// \p IsNumThreadsFromUser is true is \p NumThreads is defined by user via
+ /// thread_limit clause.
uint64_t getNumBlocks(GenericDeviceTy &GenericDevice,
uint32_t BlockLimitClause[3], uint64_t LoopTripCount,
- uint32_t &NumThreads) const;
+ uint32_t &NumThreads, bool IsNumThreadsFromUser) const;
/// Indicate if the kernel works in Generic SPMD, Generic or SPMD mode.
bool isGenericSPMDMode() const {
Index: openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
===================================================================
--- openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
+++ openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
@@ -374,8 +374,9 @@
KernelArgs.NumArgs, Args, Ptrs);
uint32_t NumThreads = getNumThreads(GenericDevice, KernelArgs.ThreadLimit);
- uint64_t NumBlocks = getNumBlocks(GenericDevice, KernelArgs.NumTeams,
- KernelArgs.Tripcount, NumThreads);
+ uint64_t NumBlocks =
+ getNumBlocks(GenericDevice, KernelArgs.NumTeams, KernelArgs.Tripcount,
+ NumThreads, KernelArgs.ThreadLimit[0] > 0);
if (auto Err =
printLaunchInfo(GenericDevice, KernelArgs, NumThreads, NumBlocks))
@@ -418,7 +419,8 @@
uint64_t GenericKernelTy::getNumBlocks(GenericDeviceTy &GenericDevice,
uint32_t NumTeamsClause[3],
uint64_t LoopTripCount,
- uint32_t &NumThreads) const {
+ uint32_t &NumThreads,
+ bool IsNumThreadsFromUser) const {
assert(NumTeamsClause[1] == 0 && NumTeamsClause[2] == 0 &&
"Multi dimensional launch not supported yet.");
@@ -443,7 +445,8 @@
// Honor the thread_limit clause; only lower the number of threads.
[[maybe_unused]] auto OldNumThreads = NumThreads;
- if (LoopTripCount >= DefaultNumBlocks * NumThreads) {
+ if (LoopTripCount >= DefaultNumBlocks * NumThreads ||
+ IsNumThreadsFromUser) {
// Enough parallelism for teams and threads.
TripCountNumBlocks = ((LoopTripCount - 1) / NumThreads) + 1;
assert(TripCountNumBlocks >= DefaultNumBlocks &&
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D158802.553772.patch
Type: text/x-patch
Size: 2913 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/openmp-commits/attachments/20230827/01e8705a/attachment.bin>
More information about the Openmp-commits
mailing list