[Openmp-commits] [openmp] r336915 - [OPENMP, NVPTX] Fix loop boundaries calculation for dynamic loops.

Alexey Bataev via Openmp-commits openmp-commits at lists.llvm.org
Thu Jul 12 08:18:28 PDT 2018


Author: abataev
Date: Thu Jul 12 08:18:28 2018
New Revision: 336915

URL: http://llvm.org/viewvc/llvm-project?rev=336915&view=rev
Log:
[OPENMP, NVPTX] Fix loop boundaries calculation for dynamic loops.

Summary:
Patch fixes the next problems.
1. Removes unused functions from omptarget_nvptx_ThreadPrivateContext
class + simplified data members.
2. Fixed calculation of loop boundaries for dynamic loops with static
scheduling.
3. Introduced saving/restoring of the dynamic loop boundaries to support
several nested parallel dynamic loops.

Reviewers: grokos

Subscribers: guansong, kkwli0, openmp-commits

Differential Revision: https://reviews.llvm.org/D49241

Modified:
    openmp/trunk/libomptarget/deviceRTLs/nvptx/src/loop.cu
    openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
    openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptxi.h
    openmp/trunk/libomptarget/deviceRTLs/nvptx/src/parallel.cu

Modified: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/loop.cu
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/deviceRTLs/nvptx/src/loop.cu?rev=336915&r1=336914&r2=336915&view=diff
==============================================================================
--- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/loop.cu (original)
+++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/loop.cu Thu Jul 12 08:18:28 2018
@@ -298,7 +298,9 @@ public:
       // compute static chunk
       ST stride;
       int lastiter = 0;
-      ForStaticChunk(lastiter, lb, ub, stride, chunk, threadId, tnum);
+      ForStaticChunk(
+          lastiter, lb, ub, stride, chunk,
+          GetOmpThreadId(tid, isSPMDMode(), isRuntimeUninitialized()), tnum);
       // save computed params
       omptarget_nvptx_threadPrivateContext->Chunk(tid) = chunk;
       omptarget_nvptx_threadPrivateContext->NextLowerBound(tid) = lb;
@@ -320,7 +322,9 @@ public:
       // compute static chunk
       ST stride;
       int lastiter = 0;
-      ForStaticNoChunk(lastiter, lb, ub, stride, chunk, threadId, tnum);
+      ForStaticNoChunk(
+          lastiter, lb, ub, stride, chunk,
+          GetOmpThreadId(tid, isSPMDMode(), isRuntimeUninitialized()), tnum);
       // save computed params
       omptarget_nvptx_threadPrivateContext->Chunk(tid) = chunk;
       omptarget_nvptx_threadPrivateContext->NextLowerBound(tid) = lb;
@@ -366,10 +370,11 @@ public:
   // Support for dispatch next
 
   INLINE static int DynamicNextChunk(T &lb, T &ub, T chunkSize,
-                                     Counter &loopLowerBound,
+                                     int64_t &loopLowerBound,
                                      T loopUpperBound) {
     // calculate lower bound for all lanes in the warp
-    lb = atomicAdd(&loopLowerBound, (Counter)chunkSize);
+    lb = atomicAdd((unsigned long long *)&loopLowerBound,
+                   (unsigned long long)chunkSize);
     ub = lb + chunkSize - 1;  // Clang uses i <= ub
 
     // 3 result cases:

Modified: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h?rev=336915&r1=336914&r2=336915&view=diff
==============================================================================
--- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h (original)
+++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h Thu Jul 12 08:18:28 2018
@@ -192,6 +192,8 @@ public:
   INLINE void CopyFromWorkDescr(omptarget_nvptx_TaskDescr *workTaskDescr);
   INLINE void CopyConvergentParent(omptarget_nvptx_TaskDescr *parentTaskDescr,
                                    uint16_t tid, uint16_t tnum);
+  INLINE void SaveLoopData();
+  INLINE void RestoreLoopData() const;
 
 private:
   // bits for flags: (7 used, 1 free)
@@ -207,6 +209,14 @@ private:
   static const uint8_t TaskDescr_IsParConstr = 0x20;
   static const uint8_t TaskDescr_InParL2P = 0x40;
 
+  struct SavedLoopDescr_items {
+    int64_t loopUpperBound;
+    int64_t nextLowerBound;
+    int64_t chunk;
+    int64_t stride;
+    kmp_sched_t schedule;
+  } loopData;
+
   struct TaskDescr_items {
     uint8_t flags; // 6 bit used (see flag above)
     uint8_t unused;
@@ -335,16 +345,8 @@ public:
   INLINE kmp_sched_t &ScheduleType(int tid) { return schedule[tid]; }
   INLINE int64_t &Chunk(int tid) { return chunk[tid]; }
   INLINE int64_t &LoopUpperBound(int tid) { return loopUpperBound[tid]; }
-  // state for dispatch with dyn/guided
-  INLINE Counter &CurrentEvent(int tid) {
-    return currEvent_or_nextLowerBound[tid];
-  }
-  INLINE Counter &EventsNumber(int tid) { return eventsNum_or_stride[tid]; }
-  // state for dispatch with static
-  INLINE Counter &NextLowerBound(int tid) {
-    return currEvent_or_nextLowerBound[tid];
-  }
-  INLINE Counter &Stride(int tid) { return eventsNum_or_stride[tid]; }
+  INLINE int64_t &NextLowerBound(int tid) { return nextLowerBound[tid]; }
+  INLINE int64_t &Stride(int tid) { return stride[tid]; }
 
   INLINE omptarget_nvptx_TeamDescr &TeamContext() { return teamContext; }
 
@@ -373,8 +375,8 @@ private:
   int64_t chunk[MAX_THREADS_PER_TEAM];
   int64_t loopUpperBound[MAX_THREADS_PER_TEAM];
   // state for dispatch with dyn/guided OR static (never use both at a time)
-  Counter currEvent_or_nextLowerBound[MAX_THREADS_PER_TEAM];
-  Counter eventsNum_or_stride[MAX_THREADS_PER_TEAM];
+  int64_t nextLowerBound[MAX_THREADS_PER_TEAM];
+  int64_t stride[MAX_THREADS_PER_TEAM];
   // Queue to which this object must be returned.
   uint64_t SourceQueue;
 };

Modified: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptxi.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptxi.h?rev=336915&r1=336914&r2=336915&view=diff
==============================================================================
--- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptxi.h (original)
+++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptxi.h Thu Jul 12 08:18:28 2018
@@ -125,6 +125,30 @@ INLINE void omptarget_nvptx_TaskDescr::C
   items.threadId = tid;
 }
 
+INLINE void omptarget_nvptx_TaskDescr::SaveLoopData() {
+  loopData.loopUpperBound =
+      omptarget_nvptx_threadPrivateContext->LoopUpperBound(items.threadId);
+  loopData.nextLowerBound =
+      omptarget_nvptx_threadPrivateContext->NextLowerBound(items.threadId);
+  loopData.schedule =
+      omptarget_nvptx_threadPrivateContext->ScheduleType(items.threadId);
+  loopData.chunk = omptarget_nvptx_threadPrivateContext->Chunk(items.threadId);
+  loopData.stride =
+      omptarget_nvptx_threadPrivateContext->Stride(items.threadId);
+}
+
+INLINE void omptarget_nvptx_TaskDescr::RestoreLoopData() const {
+  omptarget_nvptx_threadPrivateContext->Chunk(items.threadId) = loopData.chunk;
+  omptarget_nvptx_threadPrivateContext->LoopUpperBound(items.threadId) =
+      loopData.loopUpperBound;
+  omptarget_nvptx_threadPrivateContext->NextLowerBound(items.threadId) =
+      loopData.nextLowerBound;
+  omptarget_nvptx_threadPrivateContext->Stride(items.threadId) =
+      loopData.stride;
+  omptarget_nvptx_threadPrivateContext->ScheduleType(items.threadId) =
+      loopData.schedule;
+}
+
 ////////////////////////////////////////////////////////////////////////////////
 // Thread Private Context
 ////////////////////////////////////////////////////////////////////////////////

Modified: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/parallel.cu
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/deviceRTLs/nvptx/src/parallel.cu?rev=336915&r1=336914&r2=336915&view=diff
==============================================================================
--- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/parallel.cu (original)
+++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/parallel.cu Thu Jul 12 08:18:28 2018
@@ -386,6 +386,7 @@ EXTERN void __kmpc_serialized_parallel(k
 
   // get current task
   omptarget_nvptx_TaskDescr *currTaskDescr = getMyTopTaskDescriptor(threadId);
+  currTaskDescr->SaveLoopData();
 
   // allocate new task descriptor and copy value from current one, set prev to
   // it
@@ -417,6 +418,8 @@ EXTERN void __kmpc_end_serialized_parall
       threadId, currTaskDescr->GetPrevTaskDescr());
   // free
   SafeFree(currTaskDescr, (char *)"new seq parallel task");
+  currTaskDescr = getMyTopTaskDescriptor(threadId);
+  currTaskDescr->RestoreLoopData();
 }
 
 EXTERN uint16_t __kmpc_parallel_level(kmp_Indent *loc, uint32_t global_tid) {




More information about the Openmp-commits mailing list