[Openmp-commits] [openmp] fbcce33 - [OpenMP] Honor `thread_limit` value when choosing grid size
Shilei Tian via Openmp-commits
openmp-commits at lists.llvm.org
Sat Aug 26 19:17:53 PDT 2023
Author: Shilei Tian
Date: 2023-08-26T22:17:49-04:00
New Revision: fbcce337064401162be06c8bd93b99155802ec52
URL: https://github.com/llvm/llvm-project/commit/fbcce337064401162be06c8bd93b99155802ec52
DIFF: https://github.com/llvm/llvm-project/commit/fbcce337064401162be06c8bd93b99155802ec52.diff
LOG: [OpenMP] Honor `thread_limit` value when choosing grid size
D152014 introduced an optimization that favors more smaller blocks over
fewer larger blocks, even if user sets `thread_limit` explicitly. This patch changes
the behavior to honor user value.
Reviewed By: jdoerfert
Differential Revision: https://reviews.llvm.org/D158802
Added:
Modified:
openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
Removed:
################################################################################
diff --git a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
index 0e61a49433a6d2..bc8b41368bdf66 100644
--- a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
+++ b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
@@ -374,8 +374,9 @@ Error GenericKernelTy::launch(GenericDeviceTy &GenericDevice, void **ArgPtrs,
KernelArgs.NumArgs, Args, Ptrs);
uint32_t NumThreads = getNumThreads(GenericDevice, KernelArgs.ThreadLimit);
- uint64_t NumBlocks = getNumBlocks(GenericDevice, KernelArgs.NumTeams,
- KernelArgs.Tripcount, NumThreads);
+ uint64_t NumBlocks =
+ getNumBlocks(GenericDevice, KernelArgs.NumTeams, KernelArgs.Tripcount,
+ NumThreads, KernelArgs.ThreadLimit[0] > 0);
if (auto Err =
printLaunchInfo(GenericDevice, KernelArgs, NumThreads, NumBlocks))
@@ -418,7 +419,8 @@ uint32_t GenericKernelTy::getNumThreads(GenericDeviceTy &GenericDevice,
uint64_t GenericKernelTy::getNumBlocks(GenericDeviceTy &GenericDevice,
uint32_t NumTeamsClause[3],
uint64_t LoopTripCount,
- uint32_t &NumThreads) const {
+ uint32_t &NumThreads,
+ bool IsNumThreadsFromUser) const {
assert(NumTeamsClause[1] == 0 && NumTeamsClause[2] == 0 &&
"Multi dimensional launch not supported yet.");
@@ -443,7 +445,8 @@ uint64_t GenericKernelTy::getNumBlocks(GenericDeviceTy &GenericDevice,
// Honor the thread_limit clause; only lower the number of threads.
[[maybe_unused]] auto OldNumThreads = NumThreads;
- if (LoopTripCount >= DefaultNumBlocks * NumThreads) {
+ if (LoopTripCount >= DefaultNumBlocks * NumThreads ||
+ IsNumThreadsFromUser) {
// Enough parallelism for teams and threads.
TripCountNumBlocks = ((LoopTripCount - 1) / NumThreads) + 1;
assert(TripCountNumBlocks >= DefaultNumBlocks &&
diff --git a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
index 45104efef3b623..736e864d79f4e8 100644
--- a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
+++ b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
@@ -335,9 +335,11 @@ struct GenericKernelTy {
uint32_t ThreadLimitClause[3]) const;
/// The number of threads \p NumThreads can be adjusted by this method.
+ /// \p IsNumThreadsFromUser is true is \p NumThreads is defined by user via
+ /// thread_limit clause.
uint64_t getNumBlocks(GenericDeviceTy &GenericDevice,
uint32_t BlockLimitClause[3], uint64_t LoopTripCount,
- uint32_t &NumThreads) const;
+ uint32_t &NumThreads, bool IsNumThreadsFromUser) const;
/// Indicate if the kernel works in Generic SPMD, Generic or SPMD mode.
bool isGenericSPMDMode() const {
More information about the Openmp-commits
mailing list