[Openmp-commits] [openmp] fbcce33 - [OpenMP] Honor `thread_limit` value when choosing grid size

Shilei Tian via Openmp-commits openmp-commits at lists.llvm.org
Sat Aug 26 19:17:53 PDT 2023


Author: Shilei Tian
Date: 2023-08-26T22:17:49-04:00
New Revision: fbcce337064401162be06c8bd93b99155802ec52

URL: https://github.com/llvm/llvm-project/commit/fbcce337064401162be06c8bd93b99155802ec52
DIFF: https://github.com/llvm/llvm-project/commit/fbcce337064401162be06c8bd93b99155802ec52.diff

LOG: [OpenMP] Honor `thread_limit` value when choosing grid size

D152014 introduced an optimization that favors more smaller blocks over
fewer larger blocks, even if user sets `thread_limit` explicitly. This patch changes
the behavior to honor user value.

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D158802

Added: 
    

Modified: 
    openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
    openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h

Removed: 
    


################################################################################
diff  --git a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
index 0e61a49433a6d2..bc8b41368bdf66 100644
--- a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
+++ b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
@@ -374,8 +374,9 @@ Error GenericKernelTy::launch(GenericDeviceTy &GenericDevice, void **ArgPtrs,
                                     KernelArgs.NumArgs, Args, Ptrs);
 
   uint32_t NumThreads = getNumThreads(GenericDevice, KernelArgs.ThreadLimit);
-  uint64_t NumBlocks = getNumBlocks(GenericDevice, KernelArgs.NumTeams,
-                                    KernelArgs.Tripcount, NumThreads);
+  uint64_t NumBlocks =
+      getNumBlocks(GenericDevice, KernelArgs.NumTeams, KernelArgs.Tripcount,
+                   NumThreads, KernelArgs.ThreadLimit[0] > 0);
 
   if (auto Err =
           printLaunchInfo(GenericDevice, KernelArgs, NumThreads, NumBlocks))
@@ -418,7 +419,8 @@ uint32_t GenericKernelTy::getNumThreads(GenericDeviceTy &GenericDevice,
 uint64_t GenericKernelTy::getNumBlocks(GenericDeviceTy &GenericDevice,
                                        uint32_t NumTeamsClause[3],
                                        uint64_t LoopTripCount,
-                                       uint32_t &NumThreads) const {
+                                       uint32_t &NumThreads,
+                                       bool IsNumThreadsFromUser) const {
   assert(NumTeamsClause[1] == 0 && NumTeamsClause[2] == 0 &&
          "Multi dimensional launch not supported yet.");
 
@@ -443,7 +445,8 @@ uint64_t GenericKernelTy::getNumBlocks(GenericDeviceTy &GenericDevice,
 
       // Honor the thread_limit clause; only lower the number of threads.
       [[maybe_unused]] auto OldNumThreads = NumThreads;
-      if (LoopTripCount >= DefaultNumBlocks * NumThreads) {
+      if (LoopTripCount >= DefaultNumBlocks * NumThreads ||
+          IsNumThreadsFromUser) {
         // Enough parallelism for teams and threads.
         TripCountNumBlocks = ((LoopTripCount - 1) / NumThreads) + 1;
         assert(TripCountNumBlocks >= DefaultNumBlocks &&

diff  --git a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
index 45104efef3b623..736e864d79f4e8 100644
--- a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
+++ b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
@@ -335,9 +335,11 @@ struct GenericKernelTy {
                          uint32_t ThreadLimitClause[3]) const;
 
   /// The number of threads \p NumThreads can be adjusted by this method.
+  /// \p IsNumThreadsFromUser is true is \p NumThreads is defined by user via
+  /// thread_limit clause.
   uint64_t getNumBlocks(GenericDeviceTy &GenericDevice,
                         uint32_t BlockLimitClause[3], uint64_t LoopTripCount,
-                        uint32_t &NumThreads) const;
+                        uint32_t &NumThreads, bool IsNumThreadsFromUser) const;
 
   /// Indicate if the kernel works in Generic SPMD, Generic or SPMD mode.
   bool isGenericSPMDMode() const {


        


More information about the Openmp-commits mailing list