[Openmp-commits] [openmp] r333285 - [libomptarget-nvptx] loop: Determine if runtime uninitialized
Jonas Hahnfeld via Openmp-commits
openmp-commits at lists.llvm.org
Fri May 25 08:56:48 PDT 2018
Author: hahnfeld
Date: Fri May 25 08:56:48 2018
New Revision: 333285
URL: http://llvm.org/viewvc/llvm-project?rev=333285&view=rev
Log:
[libomptarget-nvptx] loop: Determine if runtime uninitialized
The generic entry points for static loop scheduling previously
hardcoded that the runtime was initialized. This can be wrong if
the compiler analyzes that the runtime is not needed and calls
the init functions accordingly.
This didn't affect clang-ykt because they have entry points for
different combinations of SPMD x Runtime not needed. I didn't do
measurements yet but with inlining we might get away with always
calling the generic interface and letting compiler and runtime
figure out the rest.
In any case, a correct runtime is always better than having
functions that may only be called if previous calls passed in
a specific set of arguments!
Differential Revision: https://reviews.llvm.org/D47131
Modified:
openmp/trunk/libomptarget/deviceRTLs/nvptx/src/loop.cu
Modified: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/loop.cu
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/deviceRTLs/nvptx/src/loop.cu?rev=333285&r1=333284&r2=333285&view=diff
==============================================================================
--- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/loop.cu (original)
+++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/loop.cu Fri May 25 08:56:48 2018
@@ -96,8 +96,8 @@ public:
INLINE static void for_static_init(int32_t schedtype, int32_t *plastiter,
T *plower, T *pupper, ST *pstride,
ST chunk, bool IsSPMDExecutionMode,
- bool IsOMPRuntimeUnavailable = false) {
- // When IsOMPRuntimeUnavailable is true, we assume that the caller is
+ bool IsRuntimeUninitialized) {
+ // When IsRuntimeUninitialized is true, we assume that the caller is
// in an L0 parallel region and that all worker threads participate.
int tid = GetLogicalThreadIdInBlock();
@@ -105,23 +105,23 @@ public:
// Assume we are in teams region or that we use a single block
// per target region
ST numberOfActiveOMPThreads = GetNumberOfOmpThreads(
- tid, IsSPMDExecutionMode, IsOMPRuntimeUnavailable);
+ tid, IsSPMDExecutionMode, IsRuntimeUninitialized);
// All warps that are in excess of the maximum requested, do
// not execute the loop
PRINT(LD_LOOP,
"OMP Thread %d: schedule type %d, chunk size = %lld, mytid "
"%d, num tids %d\n",
- GetOmpThreadId(tid, IsSPMDExecutionMode, IsOMPRuntimeUnavailable),
+ GetOmpThreadId(tid, IsSPMDExecutionMode, IsRuntimeUninitialized),
schedtype, P64(chunk),
- GetOmpThreadId(tid, IsSPMDExecutionMode, IsOMPRuntimeUnavailable),
+ GetOmpThreadId(tid, IsSPMDExecutionMode, IsRuntimeUninitialized),
GetNumberOfOmpThreads(tid, IsSPMDExecutionMode,
- IsOMPRuntimeUnavailable));
+ IsRuntimeUninitialized));
ASSERT0(
LT_FUSSY,
- (GetOmpThreadId(tid, IsSPMDExecutionMode, IsOMPRuntimeUnavailable)) <
+ (GetOmpThreadId(tid, IsSPMDExecutionMode, IsRuntimeUninitialized)) <
(GetNumberOfOmpThreads(tid, IsSPMDExecutionMode,
- IsOMPRuntimeUnavailable)),
+ IsRuntimeUninitialized)),
"current thread is not needed here; error");
// copy
@@ -135,9 +135,9 @@ public:
case kmp_sched_static_chunk: {
if (chunk > 0) {
entityId =
- GetOmpThreadId(tid, IsSPMDExecutionMode, IsOMPRuntimeUnavailable);
+ GetOmpThreadId(tid, IsSPMDExecutionMode, IsRuntimeUninitialized);
numberOfEntities = GetNumberOfOmpThreads(tid, IsSPMDExecutionMode,
- IsOMPRuntimeUnavailable);
+ IsRuntimeUninitialized);
ForStaticChunk(lastiter, lb, ub, stride, chunk, entityId,
numberOfEntities);
break;
@@ -145,9 +145,9 @@ public:
} // note: if chunk <=0, use nochunk
case kmp_sched_static_nochunk: {
entityId =
- GetOmpThreadId(tid, IsSPMDExecutionMode, IsOMPRuntimeUnavailable);
+ GetOmpThreadId(tid, IsSPMDExecutionMode, IsRuntimeUninitialized);
numberOfEntities = GetNumberOfOmpThreads(tid, IsSPMDExecutionMode,
- IsOMPRuntimeUnavailable);
+ IsRuntimeUninitialized);
ForStaticNoChunk(lastiter, lb, ub, stride, chunk, entityId,
numberOfEntities);
break;
@@ -172,12 +172,12 @@ public:
case kmp_sched_distr_static_chunk_sched_static_chunkone: {
entityId =
GetNumberOfOmpThreads(tid, IsSPMDExecutionMode,
- IsOMPRuntimeUnavailable) *
+ IsRuntimeUninitialized) *
GetOmpTeamId() +
- GetOmpThreadId(tid, IsSPMDExecutionMode, IsOMPRuntimeUnavailable);
+ GetOmpThreadId(tid, IsSPMDExecutionMode, IsRuntimeUninitialized);
numberOfEntities = GetNumberOfOmpTeams() *
GetNumberOfOmpThreads(tid, IsSPMDExecutionMode,
- IsOMPRuntimeUnavailable);
+ IsRuntimeUninitialized);
ForStaticChunk(lastiter, lb, ub, stride, chunk, entityId,
numberOfEntities);
break;
@@ -187,9 +187,9 @@ public:
PRINT(LD_LOOP, "unknown schedtype %d, revert back to static chunk\n",
schedtype);
entityId =
- GetOmpThreadId(tid, IsSPMDExecutionMode, IsOMPRuntimeUnavailable);
+ GetOmpThreadId(tid, IsSPMDExecutionMode, IsRuntimeUninitialized);
numberOfEntities = GetNumberOfOmpThreads(tid, IsSPMDExecutionMode,
- IsOMPRuntimeUnavailable);
+ IsRuntimeUninitialized);
ForStaticChunk(lastiter, lb, ub, stride, chunk, entityId,
numberOfEntities);
}
@@ -202,7 +202,7 @@ public:
PRINT(LD_LOOP,
"Got sched: Active %d, total %d: lb %lld, ub %lld, stride %lld\n",
GetNumberOfOmpThreads(tid, IsSPMDExecutionMode,
- IsOMPRuntimeUnavailable),
+ IsRuntimeUninitialized),
GetNumberOfWorkersInTeam(), P64(*plower), P64(*pupper),
P64(*pstride));
}
@@ -574,7 +574,8 @@ EXTERN void __kmpc_for_static_init_4(kmp
int32_t chunk) {
PRINT0(LD_IO, "call kmpc_for_static_init_4\n");
omptarget_nvptx_LoopSupport<int32_t, int32_t>::for_static_init(
- schedtype, plastiter, plower, pupper, pstride, chunk, isSPMDMode());
+ schedtype, plastiter, plower, pupper, pstride, chunk, isSPMDMode(),
+ isRuntimeUninitialized());
}
EXTERN void __kmpc_for_static_init_4u(kmp_Indent *loc, int32_t global_tid,
@@ -584,7 +585,8 @@ EXTERN void __kmpc_for_static_init_4u(km
int32_t chunk) {
PRINT0(LD_IO, "call kmpc_for_static_init_4u\n");
omptarget_nvptx_LoopSupport<uint32_t, int32_t>::for_static_init(
- schedtype, plastiter, plower, pupper, pstride, chunk, isSPMDMode());
+ schedtype, plastiter, plower, pupper, pstride, chunk, isSPMDMode(),
+ isRuntimeUninitialized());
}
EXTERN void __kmpc_for_static_init_8(kmp_Indent *loc, int32_t global_tid,
@@ -594,7 +596,8 @@ EXTERN void __kmpc_for_static_init_8(kmp
int64_t chunk) {
PRINT0(LD_IO, "call kmpc_for_static_init_8\n");
omptarget_nvptx_LoopSupport<int64_t, int64_t>::for_static_init(
- schedtype, plastiter, plower, pupper, pstride, chunk, isSPMDMode());
+ schedtype, plastiter, plower, pupper, pstride, chunk, isSPMDMode(),
+ isRuntimeUninitialized());
}
EXTERN void __kmpc_for_static_init_8u(kmp_Indent *loc, int32_t global_tid,
@@ -604,7 +607,8 @@ EXTERN void __kmpc_for_static_init_8u(km
int64_t chunk) {
PRINT0(LD_IO, "call kmpc_for_static_init_8u\n");
omptarget_nvptx_LoopSupport<uint64_t, int64_t>::for_static_init(
- schedtype, plastiter, plower, pupper, pstride, chunk, isSPMDMode());
+ schedtype, plastiter, plower, pupper, pstride, chunk, isSPMDMode(),
+ isRuntimeUninitialized());
}
EXTERN
@@ -616,8 +620,8 @@ void __kmpc_for_static_init_4_simple_spm
PRINT0(LD_IO, "call kmpc_for_static_init_4_simple_spmd\n");
omptarget_nvptx_LoopSupport<int32_t, int32_t>::for_static_init(
schedtype, plastiter, plower, pupper, pstride, chunk,
- /*isSPMDExecutionMode=*/true,
- /*IsOMPRuntimeUnavailable=*/true);
+ /*IsSPMDExecutionMode=*/true,
+ /*IsRuntimeUninitialized=*/true);
}
EXTERN
@@ -629,8 +633,8 @@ void __kmpc_for_static_init_4u_simple_sp
PRINT0(LD_IO, "call kmpc_for_static_init_4u_simple_spmd\n");
omptarget_nvptx_LoopSupport<uint32_t, int32_t>::for_static_init(
schedtype, plastiter, plower, pupper, pstride, chunk,
- /*isSPMDExecutionMode=*/true,
- /*IsOMPRuntimeUnavailable=*/true);
+ /*IsSPMDExecutionMode=*/true,
+ /*IsRuntimeUninitialized=*/true);
}
EXTERN
@@ -642,8 +646,8 @@ void __kmpc_for_static_init_8_simple_spm
PRINT0(LD_IO, "call kmpc_for_static_init_8_simple_spmd\n");
omptarget_nvptx_LoopSupport<int64_t, int64_t>::for_static_init(
schedtype, plastiter, plower, pupper, pstride, chunk,
- /*isSPMDExecutionMode=*/true,
- /*IsOMPRuntimeUnavailable=*/true);
+ /*IsSPMDExecutionMode=*/true,
+ /*IsRuntimeUninitialized=*/true);
}
EXTERN
@@ -655,8 +659,8 @@ void __kmpc_for_static_init_8u_simple_sp
PRINT0(LD_IO, "call kmpc_for_static_init_8u_simple_spmd\n");
omptarget_nvptx_LoopSupport<uint64_t, int64_t>::for_static_init(
schedtype, plastiter, plower, pupper, pstride, chunk,
- /*isSPMDExecutionMode=*/true,
- /*IsOMPRuntimeUnavailable=*/true);
+ /*IsSPMDExecutionMode=*/true,
+ /*IsRuntimeUninitialized=*/true);
}
EXTERN
@@ -667,8 +671,8 @@ void __kmpc_for_static_init_4_simple_gen
PRINT0(LD_IO, "call kmpc_for_static_init_4_simple_generic\n");
omptarget_nvptx_LoopSupport<int32_t, int32_t>::for_static_init(
schedtype, plastiter, plower, pupper, pstride, chunk,
- /*isSPMDExecutionMode=*/false,
- /*IsOMPRuntimeUnavailable=*/true);
+ /*IsSPMDExecutionMode=*/false,
+ /*IsRuntimeUninitialized=*/true);
}
EXTERN
@@ -679,8 +683,8 @@ void __kmpc_for_static_init_4u_simple_ge
PRINT0(LD_IO, "call kmpc_for_static_init_4u_simple_generic\n");
omptarget_nvptx_LoopSupport<uint32_t, int32_t>::for_static_init(
schedtype, plastiter, plower, pupper, pstride, chunk,
- /*isSPMDExecutionMode=*/false,
- /*IsOMPRuntimeUnavailable=*/true);
+ /*IsSPMDExecutionMode=*/false,
+ /*IsRuntimeUninitialized=*/true);
}
EXTERN
@@ -691,8 +695,8 @@ void __kmpc_for_static_init_8_simple_gen
PRINT0(LD_IO, "call kmpc_for_static_init_8_simple_generic\n");
omptarget_nvptx_LoopSupport<int64_t, int64_t>::for_static_init(
schedtype, plastiter, plower, pupper, pstride, chunk,
- /*isSPMDExecutionMode=*/false,
- /*IsOMPRuntimeUnavailable=*/true);
+ /*IsSPMDExecutionMode=*/false,
+ /*IsRuntimeUninitialized=*/true);
}
EXTERN
@@ -703,8 +707,8 @@ void __kmpc_for_static_init_8u_simple_ge
PRINT0(LD_IO, "call kmpc_for_static_init_8u_simple_generic\n");
omptarget_nvptx_LoopSupport<uint64_t, int64_t>::for_static_init(
schedtype, plastiter, plower, pupper, pstride, chunk,
- /*isSPMDExecutionMode=*/false,
- /*IsOMPRuntimeUnavailable=*/true);
+ /*IsSPMDExecutionMode=*/false,
+ /*IsRuntimeUninitialized=*/true);
}
EXTERN void __kmpc_for_static_fini(kmp_Indent *loc, int32_t global_tid) {
More information about the Openmp-commits
mailing list