[Openmp-commits] [openmp] r350524 - [OPENMP][NVPTX]Fix dynamic scheduling.
Alexey Bataev via Openmp-commits
openmp-commits at lists.llvm.org
Mon Jan 7 06:25:25 PST 2019
Author: abataev
Date: Mon Jan 7 06:25:25 2019
New Revision: 350524
URL: http://llvm.org/viewvc/llvm-project?rev=350524&view=rev
Log:
[OPENMP][NVPTX]Fix dynamic scheduling.
Summary:
Previous implementation may cause the runtime crash when the number of
teams is > 1024. Patch fixes this problem + reduces number of the atomic
operations by 32 times.
Reviewers: grokos, gtbercea, kkwli0
Subscribers: guansong, jfb, openmp-commits, caomhin
Differential Revision: https://reviews.llvm.org/D56332
Modified:
openmp/trunk/libomptarget/deviceRTLs/nvptx/src/loop.cu
openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
Modified: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/loop.cu
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/deviceRTLs/nvptx/src/loop.cu?rev=350524&r1=350523&r2=350524&view=diff
==============================================================================
--- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/loop.cu (original)
+++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/loop.cu Mon Jan 7 06:25:25 2019
@@ -352,18 +352,18 @@ public:
omptarget_nvptx_threadPrivateContext->NextLowerBound(tid),
(unsigned long long)omptarget_nvptx_threadPrivateContext->Stride(
tid));
-
} else if (schedule == kmp_sched_dynamic || schedule == kmp_sched_guided) {
- __kmpc_barrier(loc, threadId);
- // save sched state
- int teamId = GetOmpTeamId();
+ // save data
omptarget_nvptx_threadPrivateContext->ScheduleType(tid) = schedule;
- if (GetThreadIdInBlock() == 0) {
- if (chunk < 1)
- chunk = 1;
- omptarget_nvptx_threadPrivateContext->Chunk(teamId) = chunk;
- omptarget_nvptx_threadPrivateContext->LoopUpperBound(teamId) = ub;
- omptarget_nvptx_threadPrivateContext->NextLowerBound(teamId) = lb;
+ if (chunk < 1)
+ chunk = 1;
+ omptarget_nvptx_threadPrivateContext->Chunk(tid) = chunk;
+ omptarget_nvptx_threadPrivateContext->LoopUpperBound(tid) = ub;
+ omptarget_nvptx_threadPrivateContext->NextLowerBound(tid) = lb;
+ __kmpc_barrier(loc, threadId);
+ if (tid == 0) {
+ omptarget_nvptx_threadPrivateContext->Cnt() = 0;
+ __threadfence_block();
}
__kmpc_barrier(loc, threadId);
PRINT(LD_LOOP,
@@ -371,21 +371,45 @@ public:
", chunk %" PRIu64 "\n",
(int)tnum,
(unsigned long long)
- omptarget_nvptx_threadPrivateContext->NextLowerBound(teamId),
- omptarget_nvptx_threadPrivateContext->LoopUpperBound(teamId),
- omptarget_nvptx_threadPrivateContext->Chunk(teamId));
+ omptarget_nvptx_threadPrivateContext->NextLowerBound(tid),
+ omptarget_nvptx_threadPrivateContext->LoopUpperBound(tid),
+ omptarget_nvptx_threadPrivateContext->Chunk(tid));
}
}
////////////////////////////////////////////////////////////////////////////////
// Support for dispatch next
+ INLINE static int64_t Shuffle(unsigned active, int64_t val, int leader) {
+ int lo, hi;
+ asm volatile("mov.b64 {%0,%1}, %2;" : "=r"(lo), "=r"(hi) : "l"(val));
+ hi = __SHFL_SYNC(active, hi, leader);
+ lo = __SHFL_SYNC(active, lo, leader);
+ asm volatile("mov.b64 %0, {%1,%2};" : "=l"(val) : "r"(lo), "r"(hi));
+ return val;
+ }
+
+ INLINE static uint64_t NextIter() {
+ unsigned int active = __ACTIVEMASK();
+ int leader = __ffs(active) - 1;
+ int change = __popc(active);
+ unsigned lane_mask_lt;
+ asm("mov.u32 %0, %%lanemask_lt;" : "=r"(lane_mask_lt));
+ unsigned int rank = __popc(active & lane_mask_lt);
+ uint64_t warp_res;
+ if (rank == 0) {
+ warp_res = atomicAdd(
+ (unsigned long long *)&omptarget_nvptx_threadPrivateContext->Cnt(),
+ change);
+ }
+ warp_res = Shuffle(active, warp_res, leader);
+ return warp_res + rank;
+ }
+
INLINE static int DynamicNextChunk(T &lb, T &ub, T chunkSize,
- int64_t &loopLowerBound,
- T loopUpperBound) {
- // calculate lower bound for all lanes in the warp
- lb = atomicAdd((unsigned long long *)&loopLowerBound,
- (unsigned long long)chunkSize);
+ T loopLowerBound, T loopUpperBound) {
+ T N = NextIter();
+ lb = loopLowerBound + N * chunkSize;
ub = lb + chunkSize - 1; // Clang uses i <= ub
// 3 result cases:
@@ -461,11 +485,10 @@ public:
schedule == kmp_sched_dynamic || schedule == kmp_sched_guided,
"bad sched");
T myLb, myUb;
- int teamId = GetOmpTeamId();
int finished = DynamicNextChunk(
- myLb, myUb, omptarget_nvptx_threadPrivateContext->Chunk(teamId),
- omptarget_nvptx_threadPrivateContext->NextLowerBound(teamId),
- omptarget_nvptx_threadPrivateContext->LoopUpperBound(teamId));
+ myLb, myUb, omptarget_nvptx_threadPrivateContext->Chunk(tid),
+ omptarget_nvptx_threadPrivateContext->NextLowerBound(tid),
+ omptarget_nvptx_threadPrivateContext->LoopUpperBound(tid));
if (finished == FINISHED)
return DISPATCH_FINISHED;
Modified: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h?rev=350524&r1=350523&r2=350524&view=diff
==============================================================================
--- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h (original)
+++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h Mon Jan 7 06:25:25 2019
@@ -344,6 +344,7 @@ public:
INLINE omptarget_nvptx_TeamDescr &TeamContext() { return teamContext; }
INLINE void InitThreadPrivateContext(int tid);
+ INLINE uint64_t &Cnt() { return cnt; }
private:
// team context for this team
@@ -366,6 +367,7 @@ private:
// state for dispatch with dyn/guided OR static (never use both at a time)
int64_t nextLowerBound[MAX_THREADS_PER_TEAM];
int64_t stride[MAX_THREADS_PER_TEAM];
+ uint64_t cnt;
};
/// Device envrionment data
More information about the Openmp-commits
mailing list