[Openmp-commits] [PATCH] D61526: [OPENMP][NVPTX]Improve thread limit counter, NFC.

Alexey Bataev via Phabricator via Openmp-commits openmp-commits at lists.llvm.org
Fri May 3 11:54:16 PDT 2019


ABataev created this revision.
ABataev added reviewers: grokos, gtbercea, kkwli0.
Herald added subscribers: jdoerfert, guansong.
Herald added a project: OpenMP.

Patch improves performance of the full runtime mode by moving
thread-limit counter to the shared memory. It also allows to save
global memory.


Repository:
  rOMP OpenMP

https://reviews.llvm.org/D61526

Files:
  libomptarget/deviceRTLs/nvptx/src/libcall.cu
  libomptarget/deviceRTLs/nvptx/src/omp_data.cu
  libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu
  libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
  libomptarget/deviceRTLs/nvptx/src/parallel.cu


Index: libomptarget/deviceRTLs/nvptx/src/parallel.cu
===================================================================
--- libomptarget/deviceRTLs/nvptx/src/parallel.cu
+++ libomptarget/deviceRTLs/nvptx/src/parallel.cu
@@ -249,9 +249,8 @@
   uint16_t &NumThreadsClause =
       omptarget_nvptx_threadPrivateContext->NumThreadsForNextParallel(threadId);
 
-  uint16_t NumThreads =
-      determineNumberOfThreads(NumThreadsClause, currTaskDescr->NThreads(),
-                               currTaskDescr->ThreadLimit());
+  uint16_t NumThreads = determineNumberOfThreads(
+      NumThreadsClause, currTaskDescr->NThreads(), threadLimit);
 
   if (NumThreadsClause != 0) {
     // Reset request to avoid propagating to successive #parallel
Index: libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
===================================================================
--- libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
+++ libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
@@ -165,7 +165,6 @@
   INLINE int IsTaskConstruct() const { return !IsParallelConstruct(); }
   // methods for other fields
   INLINE uint16_t &NThreads() { return items.nthreads; }
-  INLINE uint16_t &ThreadLimit() { return items.threadlimit; }
   INLINE uint16_t &ThreadId() { return items.threadId; }
   INLINE uint16_t &ThreadsInTeam() { return items.threadsInTeam; }
   INLINE uint64_t &RuntimeChunkSize() { return items.runtimeChunkSize; }
@@ -213,7 +212,6 @@
     uint8_t flags; // 6 bit used (see flag above)
     uint8_t unused;
     uint16_t nthreads;         // thread num for subsequent parallel regions
-    uint16_t threadlimit;      // thread limit ICV
     uint16_t threadId;         // thread id
     uint16_t threadsInTeam;    // threads in current team
     uint64_t runtimeChunkSize; // runtime chunk size
@@ -408,6 +406,7 @@
 extern __device__ __shared__ uint32_t usedSlotIdx;
 extern __device__ __shared__ uint8_t
     parallelLevel[MAX_THREADS_PER_TEAM / WARPSIZE];
+extern __device__ __shared__ uint16_t threadLimit;
 extern __device__ __shared__
     omptarget_nvptx_ThreadPrivateContext *omptarget_nvptx_threadPrivateContext;
 
Index: libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu
===================================================================
--- libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu
+++ libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu
@@ -74,7 +74,7 @@
   omptarget_nvptx_TaskDescr *currTaskDescr =
       omptarget_nvptx_threadPrivateContext->GetTopLevelTaskDescr(threadId);
   currTaskDescr->NThreads() = GetNumberOfWorkersInTeam();
-  currTaskDescr->ThreadLimit() = ThreadLimit;
+  threadLimit = ThreadLimit;
 }
 
 EXTERN void __kmpc_kernel_deinit(int16_t IsOMPRuntimeInitialized) {
@@ -139,7 +139,6 @@
   ASSERT0(LT_FUSSY, newTaskDescr, "expected a task descr");
   newTaskDescr->InitLevelOneTaskDescr(ThreadLimit,
                                       currTeamDescr.LevelZeroTaskDescr());
-  newTaskDescr->ThreadLimit() = ThreadLimit;
   // install new top descriptor
   omptarget_nvptx_threadPrivateContext->SetTopLevelTaskDescr(threadId,
                                                              newTaskDescr);
Index: libomptarget/deviceRTLs/nvptx/src/omp_data.cu
===================================================================
--- libomptarget/deviceRTLs/nvptx/src/omp_data.cu
+++ libomptarget/deviceRTLs/nvptx/src/omp_data.cu
@@ -32,7 +32,7 @@
 __device__ __shared__ uint32_t usedSlotIdx;
 
 __device__ __shared__ uint8_t parallelLevel[MAX_THREADS_PER_TEAM / WARPSIZE];
-
+__device__ __shared__ uint16_t threadLimit;
 // Pointer to this team's OpenMP state object
 __device__ __shared__
     omptarget_nvptx_ThreadPrivateContext *omptarget_nvptx_threadPrivateContext;
Index: libomptarget/deviceRTLs/nvptx/src/libcall.cu
===================================================================
--- libomptarget/deviceRTLs/nvptx/src/libcall.cu
+++ libomptarget/deviceRTLs/nvptx/src/libcall.cu
@@ -70,10 +70,7 @@
 EXTERN int omp_get_thread_limit(void) {
   if (isSPMDMode())
     return GetNumberOfThreadsInBlock();
-  // per contention group.. meaning threads in current team
-  omptarget_nvptx_TaskDescr *currTaskDescr =
-      getMyTopTaskDescriptor(/*isSPMDExecutionMode=*/false);
-  int rc = currTaskDescr->ThreadLimit();
+  int rc = threadLimit;
   PRINT(LD_IO, "call omp_get_thread_limit() return %d\n", rc);
   return rc;
 }


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D61526.198058.patch
Type: text/x-patch
Size: 4383 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/openmp-commits/attachments/20190503/60d336b0/attachment-0001.bin>


More information about the Openmp-commits mailing list