[Openmp-commits] [PATCH] D158802: [OpenMP] Honor `thread_limit` value when choosing grid size

Shilei Tian via Phabricator via Openmp-commits openmp-commits at lists.llvm.org
Thu Aug 24 20:10:56 PDT 2023


tianshilei1992 updated this revision to Diff 553337.
tianshilei1992 marked 3 inline comments as done.
tianshilei1992 added a comment.

rebase and fix comments


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D158802/new/

https://reviews.llvm.org/D158802

Files:
  openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
  openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h


Index: openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
===================================================================
--- openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
+++ openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
@@ -335,9 +335,11 @@
                          uint32_t ThreadLimitClause[3]) const;
 
   /// The number of threads \p NumThreads can be adjusted by this method.
+  /// \p IsNumThreadsFromUser is true is \p NumThreads is defined by user via
+  /// thread_limit clause.
   uint64_t getNumBlocks(GenericDeviceTy &GenericDevice,
                         uint32_t BlockLimitClause[3], uint64_t LoopTripCount,
-                        uint32_t &NumThreads) const;
+                        uint32_t &NumThreads, bool IsNumThreadsFromUser) const;
 
   /// Indicate if the kernel works in Generic SPMD, Generic or SPMD mode.
   bool isGenericSPMDMode() const {
Index: openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
===================================================================
--- openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
+++ openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
@@ -327,8 +327,9 @@
                                     KernelArgs.NumArgs, Args, Ptrs);
 
   uint32_t NumThreads = getNumThreads(GenericDevice, KernelArgs.ThreadLimit);
-  uint64_t NumBlocks = getNumBlocks(GenericDevice, KernelArgs.NumTeams,
-                                    KernelArgs.Tripcount, NumThreads);
+  uint64_t NumBlocks =
+      getNumBlocks(GenericDevice, KernelArgs.NumTeams, KernelArgs.Tripcount,
+                   NumThreads, KernelArgs.ThreadLimit[0] > 0);
 
   if (auto Err =
           printLaunchInfo(GenericDevice, KernelArgs, NumThreads, NumBlocks))
@@ -371,7 +372,8 @@
 uint64_t GenericKernelTy::getNumBlocks(GenericDeviceTy &GenericDevice,
                                        uint32_t NumTeamsClause[3],
                                        uint64_t LoopTripCount,
-                                       uint32_t &NumThreads) const {
+                                       uint32_t &NumThreads,
+                                       bool IsNumThreadsFromUser) const {
   assert(NumTeamsClause[1] == 0 && NumTeamsClause[2] == 0 &&
          "Multi dimensional launch not supported yet.");
 
@@ -396,7 +398,8 @@
 
       // Honor the thread_limit clause; only lower the number of threads.
       [[maybe_unused]] auto OldNumThreads = NumThreads;
-      if (LoopTripCount >= DefaultNumBlocks * NumThreads) {
+      if (LoopTripCount >= DefaultNumBlocks * NumThreads ||
+          IsNumThreadsFromUser) {
         // Enough parallelism for teams and threads.
         TripCountNumBlocks = ((LoopTripCount - 1) / NumThreads) + 1;
         assert(TripCountNumBlocks >= DefaultNumBlocks &&
@@ -417,7 +420,8 @@
                "Expected sufficient inner parallelism.");
         TripCountNumBlocks = ((LoopTripCount - 1) / NumThreads) + 1;
       } else {
-        // Not enough parallelism for teams and threads, limit both.
+        // Not enough parallelism for teams and threads, limit both iff
+        // NumThreads value is not from user.
         NumThreads = std::min(NumThreads, MinThreads);
         TripCountNumBlocks = ((LoopTripCount - 1) / NumThreads) + 1;
       }


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D158802.553337.patch
Type: text/x-patch
Size: 3403 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/openmp-commits/attachments/20230825/c0ed64a4/attachment.bin>


More information about the Openmp-commits mailing list