[Openmp-commits] [openmp] r360457 - [OPENMP][NVPTX]Improve number of threads counter, NFC.

Alexey Bataev via Openmp-commits openmp-commits at lists.llvm.org
Fri May 10 11:56:06 PDT 2019


Author: abataev
Date: Fri May 10 11:56:05 2019
New Revision: 360457

URL: http://llvm.org/viewvc/llvm-project?rev=360457&view=rev
Log:
[OPENMP][NVPTX]Improve number of threads counter, NFC.

Summary:
Patch improves performance of the full runtime mode by moving
number-of-threads counter to the shared memory. It also allows to save
global memory.

Reviewers: grokos, gtbercea, kkwli0

Subscribers: guansong, jfb, jdoerfert, openmp-commits, caomhin

Tags: #openmp

Differential Revision: https://reviews.llvm.org/D61785

Modified:
    openmp/trunk/libomptarget/deviceRTLs/nvptx/src/libcall.cu
    openmp/trunk/libomptarget/deviceRTLs/nvptx/src/loop.cu
    openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omp_data.cu
    openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu
    openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
    openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptxi.h
    openmp/trunk/libomptarget/deviceRTLs/nvptx/src/parallel.cu
    openmp/trunk/libomptarget/deviceRTLs/nvptx/src/reduction.cu
    openmp/trunk/libomptarget/deviceRTLs/nvptx/src/support.h
    openmp/trunk/libomptarget/deviceRTLs/nvptx/src/supporti.h
    openmp/trunk/libomptarget/deviceRTLs/nvptx/src/sync.cu

Modified: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/libcall.cu
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/deviceRTLs/nvptx/src/libcall.cu?rev=360457&r1=360456&r2=360457&view=diff
==============================================================================
--- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/libcall.cu (original)
+++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/libcall.cu Fri May 10 11:56:05 2019
@@ -45,9 +45,7 @@ EXTERN void omp_set_num_threads(int num)
 }
 
 EXTERN int omp_get_num_threads(void) {
-  bool isSPMDExecutionMode = isSPMDMode();
-  int tid = GetLogicalThreadIdInBlock(isSPMDExecutionMode);
-  int rc = GetNumberOfOmpThreads(tid, isSPMDExecutionMode);
+  int rc = GetNumberOfOmpThreads(isSPMDMode());
   PRINT(LD_IO, "call omp_get_num_threads() return %d\n", rc);
   return rc;
 }
@@ -156,10 +154,7 @@ EXTERN int omp_get_ancestor_thread_num(i
   int rc = -1;
   // If level is 0 or all parallel regions are not active - return 0.
   unsigned parLevel = parallelLevel[GetWarpId()];
-  if (level == 0 || (level > 0 && parLevel < OMP_ACTIVE_PARALLEL_LEVEL &&
-                     level <= parLevel)) {
-    rc = 0;
-  } else if (level > 0) {
+  if (level == 1 && parLevel > OMP_ACTIVE_PARALLEL_LEVEL) {
     int totLevel = omp_get_level();
     if (level <= totLevel) {
       omptarget_nvptx_TaskDescr *currTaskDescr =
@@ -179,8 +174,7 @@ EXTERN int omp_get_ancestor_thread_num(i
                 (currTaskDescr->IsParallelConstruct() ? "par" : "task"),
                 (int)currTaskDescr->InParallelRegion(), (int)sched,
                 currTaskDescr->RuntimeChunkSize(),
-                (int)currTaskDescr->ThreadId(),
-                (int)currTaskDescr->ThreadsInTeam(),
+                (int)currTaskDescr->ThreadId(), (int)threadsInTeam,
                 (int)currTaskDescr->NThreads());
         }
 
@@ -196,6 +190,12 @@ EXTERN int omp_get_ancestor_thread_num(i
       } while (currTaskDescr);
       ASSERT0(LT_FUSSY, !steps, "expected to find all steps");
     }
+  } else if (level == 0 ||
+             (level > 0 && parLevel < OMP_ACTIVE_PARALLEL_LEVEL &&
+              level <= parLevel) ||
+             (level > 1 && parLevel > OMP_ACTIVE_PARALLEL_LEVEL &&
+              level <= (parLevel - OMP_ACTIVE_PARALLEL_LEVEL))) {
+    rc = 0;
   }
   PRINT(LD_IO, "call omp_get_ancestor_thread_num(level %d) returns %d\n", level,
         rc)
@@ -208,30 +208,14 @@ EXTERN int omp_get_team_size(int level)
   int rc = -1;
   unsigned parLevel = parallelLevel[GetWarpId()];
   // If level is 0 or all parallel regions are not active - return 1.
-  if (level == 0 || (level > 0 && parLevel < OMP_ACTIVE_PARALLEL_LEVEL &&
-                     level <= parLevel)) {
+  if (level == 1 && parLevel > OMP_ACTIVE_PARALLEL_LEVEL) {
+    rc = threadsInTeam;
+  } else if (level == 0 ||
+             (level > 0 && parLevel < OMP_ACTIVE_PARALLEL_LEVEL &&
+              level <= parLevel) ||
+             (level > 1 && parLevel > OMP_ACTIVE_PARALLEL_LEVEL &&
+              level <= (parLevel - OMP_ACTIVE_PARALLEL_LEVEL))) {
     rc = 1;
-  } else if (level > 0) {
-    int totLevel = omp_get_level();
-    if (level <= totLevel) {
-      omptarget_nvptx_TaskDescr *currTaskDescr =
-          getMyTopTaskDescriptor(/*isSPMDExecutionMode=*/false);
-      int steps = totLevel - level;
-      ASSERT0(LT_FUSSY, currTaskDescr,
-              "do not expect fct to be called in a non-active thread");
-      do {
-        if (currTaskDescr->IsParallelConstruct()) {
-          if (!steps) {
-            // found the level
-            rc = currTaskDescr->ThreadsInTeam();
-            break;
-          }
-          steps--;
-        }
-        currTaskDescr = currTaskDescr->GetPrevTaskDescr();
-      } while (currTaskDescr);
-      ASSERT0(LT_FUSSY, !steps, "expected to find all steps");
-    }
   }
   PRINT(LD_IO, "call omp_get_team_size(level %d) returns %d\n", level, rc)
   return rc;

Modified: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/loop.cu
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/deviceRTLs/nvptx/src/loop.cu?rev=360457&r1=360456&r2=360457&view=diff
==============================================================================
--- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/loop.cu (original)
+++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/loop.cu Fri May 10 11:56:05 2019
@@ -99,12 +99,9 @@ public:
     // When IsRuntimeUninitialized is true, we assume that the caller is
     // in an L0 parallel region and that all worker threads participate.
 
-    int tid = GetLogicalThreadIdInBlock(IsSPMDExecutionMode);
-
     // Assume we are in teams region or that we use a single block
     // per target region
-    ST numberOfActiveOMPThreads =
-        GetNumberOfOmpThreads(tid, IsSPMDExecutionMode);
+    ST numberOfActiveOMPThreads = GetNumberOfOmpThreads(IsSPMDExecutionMode);
 
     // All warps that are in excess of the maximum requested, do
     // not execute the loop
@@ -212,7 +209,7 @@ public:
     }
     int tid = GetLogicalThreadIdInBlock(checkSPMDMode(loc));
     omptarget_nvptx_TaskDescr *currTaskDescr = getMyTopTaskDescriptor(tid);
-    T tnum = currTaskDescr->ThreadsInTeam();
+    T tnum = GetNumberOfOmpThreads(checkSPMDMode(loc));
     T tripCount = ub - lb + 1; // +1 because ub is inclusive
     ASSERT0(LT_FUSSY, threadId < tnum,
             "current thread is not needed here; error");
@@ -455,7 +452,7 @@ public:
 
     // automatically selects thread or warp ID based on selected implementation
     int tid = GetLogicalThreadIdInBlock(checkSPMDMode(loc));
-    ASSERT0(LT_FUSSY, gtid < GetNumberOfOmpThreads(tid, checkSPMDMode(loc)),
+    ASSERT0(LT_FUSSY, gtid < GetNumberOfOmpThreads(checkSPMDMode(loc)),
             "current thread is not needed here; error");
     // retrieve schedule
     kmp_sched_t schedule =
@@ -509,7 +506,7 @@ public:
     PRINT(LD_LOOP,
           "Got sched: active %d, total %d: lb %lld, ub %lld, stride = %lld, "
           "last %d\n",
-          (int)GetNumberOfOmpThreads(tid, isSPMDMode()),
+          (int)GetNumberOfOmpThreads(isSPMDMode()),
           (int)GetNumberOfWorkersInTeam(), (long long)*plower,
           (long long)*pupper, (long long)*pstride, (int)*plast);
     return DISPATCH_NOTFINISHED;
@@ -782,8 +779,7 @@ EXTERN void __kmpc_reduce_conditional_la
           "Expected non-SPMD mode + initialized runtime.");
 
   omptarget_nvptx_TeamDescr &teamDescr = getMyTeamDescriptor();
-  int tid = GetLogicalThreadIdInBlock(checkSPMDMode(loc));
-  uint32_t NumThreads = GetNumberOfOmpThreads(tid, checkSPMDMode(loc));
+  uint32_t NumThreads = GetNumberOfOmpThreads(checkSPMDMode(loc));
   uint64_t *Buffer = teamDescr.getLastprivateIterBuffer();
   for (unsigned i = 0; i < varNum; i++) {
     // Reset buffer.

Modified: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omp_data.cu
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omp_data.cu?rev=360457&r1=360456&r2=360457&view=diff
==============================================================================
--- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omp_data.cu (original)
+++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omp_data.cu Fri May 10 11:56:05 2019
@@ -33,6 +33,7 @@ __device__ __shared__ uint32_t usedSlotI
 
 __device__ __shared__ uint8_t parallelLevel[MAX_THREADS_PER_TEAM / WARPSIZE];
 __device__ __shared__ uint16_t threadLimit;
+__device__ __shared__ uint16_t threadsInTeam;
 // Pointer to this team's OpenMP state object
 __device__ __shared__
     omptarget_nvptx_ThreadPrivateContext *omptarget_nvptx_threadPrivateContext;

Modified: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu?rev=360457&r1=360456&r2=360457&view=diff
==============================================================================
--- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu (original)
+++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu Fri May 10 11:56:05 2019
@@ -137,8 +137,7 @@ EXTERN void __kmpc_spmd_kernel_init(int
   omptarget_nvptx_TaskDescr *newTaskDescr =
       omptarget_nvptx_threadPrivateContext->Level1TaskDescr(threadId);
   ASSERT0(LT_FUSSY, newTaskDescr, "expected a task descr");
-  newTaskDescr->InitLevelOneTaskDescr(ThreadLimit,
-                                      currTeamDescr.LevelZeroTaskDescr());
+  newTaskDescr->InitLevelOneTaskDescr(currTeamDescr.LevelZeroTaskDescr());
   // install new top descriptor
   omptarget_nvptx_threadPrivateContext->SetTopLevelTaskDescr(threadId,
                                                              newTaskDescr);
@@ -147,7 +146,7 @@ EXTERN void __kmpc_spmd_kernel_init(int
   PRINT(LD_PAR,
         "thread will execute parallel region with id %d in a team of "
         "%d threads\n",
-        (int)newTaskDescr->ThreadId(), (int)newTaskDescr->ThreadsInTeam());
+        (int)newTaskDescr->ThreadId(), (int)ThreadLimit);
 
   if (RequiresDataSharing && GetLaneId() == 0) {
     // Warp master innitializes data sharing environment.

Modified: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h?rev=360457&r1=360456&r2=360457&view=diff
==============================================================================
--- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h (original)
+++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h Fri May 10 11:56:05 2019
@@ -166,7 +166,6 @@ public:
   // methods for other fields
   INLINE uint16_t &NThreads() { return items.nthreads; }
   INLINE uint16_t &ThreadId() { return items.threadId; }
-  INLINE uint16_t &ThreadsInTeam() { return items.threadsInTeam; }
   INLINE uint64_t &RuntimeChunkSize() { return items.runtimeChunkSize; }
   INLINE omptarget_nvptx_TaskDescr *GetPrevTaskDescr() const { return prev; }
   INLINE void SetPrevTaskDescr(omptarget_nvptx_TaskDescr *taskDescr) {
@@ -174,14 +173,12 @@ public:
   }
   // init & copy
   INLINE void InitLevelZeroTaskDescr(bool isSPMDExecutionMode);
-  INLINE void InitLevelOneTaskDescr(uint16_t tnum,
-                                    omptarget_nvptx_TaskDescr *parentTaskDescr);
+  INLINE void InitLevelOneTaskDescr(omptarget_nvptx_TaskDescr *parentTaskDescr);
   INLINE void Copy(omptarget_nvptx_TaskDescr *sourceTaskDescr);
   INLINE void CopyData(omptarget_nvptx_TaskDescr *sourceTaskDescr);
   INLINE void CopyParent(omptarget_nvptx_TaskDescr *parentTaskDescr);
   INLINE void CopyForExplicitTask(omptarget_nvptx_TaskDescr *parentTaskDescr);
-  INLINE void CopyToWorkDescr(omptarget_nvptx_TaskDescr *masterTaskDescr,
-                              uint16_t tnum);
+  INLINE void CopyToWorkDescr(omptarget_nvptx_TaskDescr *masterTaskDescr);
   INLINE void CopyFromWorkDescr(omptarget_nvptx_TaskDescr *workTaskDescr);
   INLINE void CopyConvergentParent(omptarget_nvptx_TaskDescr *parentTaskDescr,
                                    uint16_t tid, uint16_t tnum);
@@ -213,7 +210,6 @@ private:
     uint8_t unused;
     uint16_t nthreads;         // thread num for subsequent parallel regions
     uint16_t threadId;         // thread id
-    uint16_t threadsInTeam;    // threads in current team
     uint64_t runtimeChunkSize; // runtime chunk size
   } items;
   omptarget_nvptx_TaskDescr *prev;
@@ -407,6 +403,7 @@ extern __device__ __shared__ uint32_t us
 extern __device__ __shared__ uint8_t
     parallelLevel[MAX_THREADS_PER_TEAM / WARPSIZE];
 extern __device__ __shared__ uint16_t threadLimit;
+extern __device__ __shared__ uint16_t threadsInTeam;
 extern __device__ __shared__
     omptarget_nvptx_ThreadPrivateContext *omptarget_nvptx_threadPrivateContext;
 

Modified: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptxi.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptxi.h?rev=360457&r1=360456&r2=360457&view=diff
==============================================================================
--- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptxi.h (original)
+++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptxi.h Fri May 10 11:56:05 2019
@@ -42,14 +42,13 @@ omptarget_nvptx_TaskDescr::InitLevelZero
   items.nthreads = GetNumberOfProcsInTeam(isSPMDExecutionMode);
   ;                                // threads: whatever was alloc by kernel
   items.threadId = 0;         // is master
-  items.threadsInTeam = 1;    // sequential
   items.runtimeChunkSize = 1; // prefered chunking statik with chunk 1
 }
 
 // This is called when all threads are started together in SPMD mode.
 // OMP directives include target parallel, target distribute parallel for, etc.
 INLINE void omptarget_nvptx_TaskDescr::InitLevelOneTaskDescr(
-    uint16_t tnum, omptarget_nvptx_TaskDescr *parentTaskDescr) {
+    omptarget_nvptx_TaskDescr *parentTaskDescr) {
   // slow method
   // flag:
   //   default sched is static,
@@ -61,7 +60,6 @@ INLINE void omptarget_nvptx_TaskDescr::I
   items.nthreads = 0; // # threads for subsequent parallel region
   items.threadId =
       GetThreadIdInBlock(); // get ids from cuda (only called for 1st level)
-  items.threadsInTeam = tnum;
   items.runtimeChunkSize = 1; // prefered chunking statik with chunk 1
   prev = parentTaskDescr;
 }
@@ -91,12 +89,11 @@ INLINE void omptarget_nvptx_TaskDescr::C
 }
 
 INLINE void omptarget_nvptx_TaskDescr::CopyToWorkDescr(
-    omptarget_nvptx_TaskDescr *masterTaskDescr, uint16_t tnum) {
+    omptarget_nvptx_TaskDescr *masterTaskDescr) {
   CopyParent(masterTaskDescr);
   // overrwrite specific items;
   items.flags |=
       TaskDescr_InPar | TaskDescr_IsParConstr; // set flag to parallel
-  items.threadsInTeam = tnum;             // set number of threads
 }
 
 INLINE void omptarget_nvptx_TaskDescr::CopyFromWorkDescr(
@@ -121,7 +118,6 @@ INLINE void omptarget_nvptx_TaskDescr::C
     omptarget_nvptx_TaskDescr *parentTaskDescr, uint16_t tid, uint16_t tnum) {
   CopyParent(parentTaskDescr);
   items.flags |= TaskDescr_InParL2P; // In L2+ parallelism
-  items.threadsInTeam = tnum;        // set number of threads
   items.threadId = tid;
 }
 

Modified: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/parallel.cu
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/deviceRTLs/nvptx/src/parallel.cu?rev=360457&r1=360456&r2=360457&view=diff
==============================================================================
--- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/parallel.cu (original)
+++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/parallel.cu Fri May 10 11:56:05 2019
@@ -264,7 +264,8 @@ EXTERN void __kmpc_kernel_prepare_parall
 
   // Set number of threads on work descriptor.
   omptarget_nvptx_WorkDescr &workDescr = getMyWorkDescriptor();
-  workDescr.WorkTaskDescr()->CopyToWorkDescr(currTaskDescr, NumThreads);
+  workDescr.WorkTaskDescr()->CopyToWorkDescr(currTaskDescr);
+  threadsInTeam = NumThreads;
 }
 
 // All workers call this function.  Deactivate those not needed.
@@ -294,7 +295,7 @@ EXTERN bool __kmpc_kernel_parallel(void
   // Set to true for workers participating in the parallel region.
   bool isActive = false;
   // Initialize state for active threads.
-  if (threadId < workDescr.WorkTaskDescr()->ThreadsInTeam()) {
+  if (threadId < threadsInTeam) {
     // init work descriptor from workdesccr
     omptarget_nvptx_TaskDescr *newTaskDescr =
         omptarget_nvptx_threadPrivateContext->Level1TaskDescr(threadId);
@@ -310,7 +311,7 @@ EXTERN bool __kmpc_kernel_parallel(void
           (int)newTaskDescr->ThreadId(), (int)newTaskDescr->NThreads());
 
     isActive = true;
-    IncParallelLevel(workDescr.WorkTaskDescr()->ThreadsInTeam() != 1);
+    IncParallelLevel(threadsInTeam != 1);
   }
 
   return isActive;
@@ -328,7 +329,7 @@ EXTERN void __kmpc_kernel_end_parallel()
   omptarget_nvptx_threadPrivateContext->SetTopLevelTaskDescr(
       threadId, currTaskDescr->GetPrevTaskDescr());
 
-  DecParallelLevel(currTaskDescr->ThreadsInTeam() != 1);
+  DecParallelLevel(threadsInTeam != 1);
 }
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -367,7 +368,6 @@ EXTERN void __kmpc_serialized_parallel(k
   // - each thread becomes ID 0 in its serialized parallel, and
   // - there is only one thread per team
   newTaskDescr->ThreadId() = 0;
-  newTaskDescr->ThreadsInTeam() = 1;
 
   // set new task descriptor as top
   omptarget_nvptx_threadPrivateContext->SetTopLevelTaskDescr(threadId,

Modified: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/reduction.cu
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/deviceRTLs/nvptx/src/reduction.cu?rev=360457&r1=360456&r2=360457&view=diff
==============================================================================
--- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/reduction.cu (original)
+++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/reduction.cu Fri May 10 11:56:05 2019
@@ -20,8 +20,7 @@
 EXTERN
 int32_t __gpu_block_reduce() {
   bool isSPMDExecutionMode = isSPMDMode();
-  int tid = GetLogicalThreadIdInBlock(isSPMDExecutionMode);
-  int nt = GetNumberOfOmpThreads(tid, isSPMDExecutionMode);
+  int nt = GetNumberOfOmpThreads(isSPMDExecutionMode);
   if (nt != blockDim.x)
     return 0;
   unsigned tnum = __ACTIVEMASK();
@@ -39,7 +38,7 @@ int32_t __kmpc_reduce_gpu(kmp_Ident *loc
   omptarget_nvptx_TaskDescr *currTaskDescr = getMyTopTaskDescriptor(threadId);
   int numthread;
   if (currTaskDescr->IsParallelConstruct()) {
-    numthread = GetNumberOfOmpThreads(threadId, checkSPMDMode(loc));
+    numthread = GetNumberOfOmpThreads(checkSPMDMode(loc));
   } else {
     numthread = GetNumberOfOmpTeams();
   }
@@ -147,8 +146,7 @@ static int32_t nvptx_parallel_reduce_now
     kmp_ShuffleReductFctPtr shflFct, kmp_InterWarpCopyFctPtr cpyFct,
     bool isSPMDExecutionMode, bool isRuntimeUninitialized) {
   uint32_t BlockThreadId = GetLogicalThreadIdInBlock(isSPMDExecutionMode);
-  uint32_t NumThreads =
-      GetNumberOfOmpThreads(BlockThreadId, isSPMDExecutionMode);
+  uint32_t NumThreads = GetNumberOfOmpThreads(isSPMDExecutionMode);
   if (NumThreads == 1)
     return 1;
   /*
@@ -279,9 +277,8 @@ static int32_t nvptx_teams_reduce_nowait
   // In generic mode only the team master participates in the teams
   // reduction because the workers are waiting for parallel work.
   uint32_t NumThreads =
-      isSPMDExecutionMode
-          ? GetNumberOfOmpThreads(ThreadId, /*isSPMDExecutionMode=*/true)
-          : /*Master thread only*/ 1;
+      isSPMDExecutionMode ? GetNumberOfOmpThreads(/*isSPMDExecutionMode=*/true)
+                          : /*Master thread only*/ 1;
   uint32_t TeamId = GetBlockIdInKernel();
   uint32_t NumTeams = GetNumberOfBlocksInKernel();
   __shared__ volatile bool IsLastTeam;
@@ -473,9 +470,8 @@ EXTERN int32_t __kmpc_nvptx_teams_reduce
   // In generic mode only the team master participates in the teams
   // reduction because the workers are waiting for parallel work.
   uint32_t NumThreads =
-      checkSPMDMode(loc)
-          ? GetNumberOfOmpThreads(ThreadId, /*isSPMDExecutionMode=*/true)
-          : /*Master thread only*/ 1;
+      checkSPMDMode(loc) ? GetNumberOfOmpThreads(/*isSPMDExecutionMode=*/true)
+                         : /*Master thread only*/ 1;
   uint32_t TeamId = GetBlockIdInKernel();
   uint32_t NumTeams = GetNumberOfBlocksInKernel();
   __shared__ unsigned Bound;

Modified: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/support.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/deviceRTLs/nvptx/src/support.h?rev=360457&r1=360456&r2=360457&view=diff
==============================================================================
--- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/support.h (original)
+++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/support.h Fri May 10 11:56:05 2019
@@ -54,8 +54,7 @@ INLINE int GetOmpThreadId(int threadId,
 INLINE int GetOmpTeamId();                              // omp_team_num
 
 // get OpenMP number of threads and team
-INLINE int GetNumberOfOmpThreads(int threadId,
-                                 bool isSPMDExecutionMode); // omp_num_threads
+INLINE int GetNumberOfOmpThreads(bool isSPMDExecutionMode); // omp_num_threads
 INLINE int GetNumberOfOmpTeams();                           // omp_num_teams
 
 // get OpenMP number of procs

Modified: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/supporti.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/deviceRTLs/nvptx/src/supporti.h?rev=360457&r1=360456&r2=360457&view=diff
==============================================================================
--- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/supporti.h (original)
+++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/supporti.h Fri May 10 11:56:05 2019
@@ -165,18 +165,16 @@ INLINE int GetOmpThreadId(int threadId,
   return rc;
 }
 
-INLINE int GetNumberOfOmpThreads(int threadId, bool isSPMDExecutionMode) {
+INLINE int GetNumberOfOmpThreads(bool isSPMDExecutionMode) {
   // omp_num_threads
   int rc;
-  if ((parallelLevel[GetWarpId()] & (OMP_ACTIVE_PARALLEL_LEVEL - 1)) > 1) {
+  int Level = parallelLevel[GetWarpId()];
+  if (Level != OMP_ACTIVE_PARALLEL_LEVEL + 1) {
     rc = 1;
   } else if (isSPMDExecutionMode) {
     rc = GetNumberOfThreadsInBlock();
   } else {
-    omptarget_nvptx_TaskDescr *currTaskDescr =
-        omptarget_nvptx_threadPrivateContext->GetTopLevelTaskDescr(threadId);
-    ASSERT0(LT_FUSSY, currTaskDescr, "expected a top task descr");
-    rc = currTaskDescr->ThreadsInTeam();
+    rc = threadsInTeam;
   }
 
   return rc;

Modified: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/sync.cu
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/deviceRTLs/nvptx/src/sync.cu?rev=360457&r1=360456&r2=360457&view=diff
==============================================================================
--- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/sync.cu (original)
+++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/sync.cu Fri May 10 11:56:05 2019
@@ -46,10 +46,8 @@ EXTERN void __kmpc_barrier(kmp_Ident *lo
     __kmpc_barrier_simple_spmd(loc_ref, tid);
   } else {
     tid = GetLogicalThreadIdInBlock(checkSPMDMode(loc_ref));
-    omptarget_nvptx_TaskDescr *currTaskDescr =
-        omptarget_nvptx_threadPrivateContext->GetTopLevelTaskDescr(tid);
     int numberOfActiveOMPThreads =
-        GetNumberOfOmpThreads(tid, checkSPMDMode(loc_ref));
+        GetNumberOfOmpThreads(checkSPMDMode(loc_ref));
     if (numberOfActiveOMPThreads > 1) {
       if (checkSPMDMode(loc_ref)) {
         __kmpc_barrier_simple_spmd(loc_ref, tid);




More information about the Openmp-commits mailing list