[Openmp-commits] [openmp] 1e12156 - [OpenMP][NFCI] Pipe the IdentTy object through more new RT functions
Shilei Tian via Openmp-commits
openmp-commits at lists.llvm.org
Thu Jan 27 12:36:59 PST 2022
Author: Johannes Doerfert
Date: 2022-01-27T15:36:55-05:00
New Revision: 1e1215689656459ad41d4501d9204dcea4004a71
URL: https://github.com/llvm/llvm-project/commit/1e1215689656459ad41d4501d9204dcea4004a71
DIFF: https://github.com/llvm/llvm-project/commit/1e1215689656459ad41d4501d9204dcea4004a71.diff
LOG: [OpenMP][NFCI] Pipe the IdentTy object through more new RT functions
IdentTy objects are useful for debugging and profiling so we want to
keep them around in more places, especially those that have a large
impact on performance, e.g., everything related to state.
Reviewed By: tianshilei1992
Differential Revision: https://reviews.llvm.org/D112494
Added:
Modified:
openmp/libomptarget/DeviceRTL/include/State.h
openmp/libomptarget/DeviceRTL/src/Parallelism.cpp
openmp/libomptarget/DeviceRTL/src/State.cpp
openmp/libomptarget/DeviceRTL/src/Tasking.cpp
Removed:
################################################################################
diff --git a/openmp/libomptarget/DeviceRTL/include/State.h b/openmp/libomptarget/DeviceRTL/include/State.h
index c860bd1b98b8..3365b054b472 100644
--- a/openmp/libomptarget/DeviceRTL/include/State.h
+++ b/openmp/libomptarget/DeviceRTL/include/State.h
@@ -40,56 +40,61 @@ enum ValueKind {
};
/// TODO
-void enterDataEnvironment();
+void enterDataEnvironment(IdentTy *Ident);
/// TODO
void exitDataEnvironment();
/// TODO
struct DateEnvironmentRAII {
- DateEnvironmentRAII() { enterDataEnvironment(); }
+ DateEnvironmentRAII(IdentTy *Ident) { enterDataEnvironment(Ident); }
~DateEnvironmentRAII() { exitDataEnvironment(); }
};
/// TODO
void resetStateForThread(uint32_t TId);
-uint32_t &lookup32(ValueKind VK, bool IsReadonly);
+uint32_t &lookup32(ValueKind VK, bool IsReadonly, IdentTy *Ident);
void *&lookupPtr(ValueKind VK, bool IsReadonly);
/// A class without actual state used to provide a nice interface to lookup and
/// update ICV values we can declare in global scope.
template <typename Ty, ValueKind Kind> struct Value {
__attribute__((flatten, always_inline)) operator Ty() {
- return lookup(/* IsReadonly */ true);
+ return lookup(/* IsReadonly */ true, /* IdentTy */ nullptr);
}
__attribute__((flatten, always_inline)) Value &operator=(const Ty &Other) {
- set(Other);
+ set(Other, /* IdentTy */ nullptr);
return *this;
}
__attribute__((flatten, always_inline)) Value &operator++() {
- inc(1);
+ inc(1, /* IdentTy */ nullptr);
return *this;
}
__attribute__((flatten, always_inline)) Value &operator--() {
- inc(-1);
+ inc(-1, /* IdentTy */ nullptr);
return *this;
}
private:
- Ty &lookup(bool IsReadonly) {
- Ty &t = lookup32(Kind, IsReadonly);
+ __attribute__((flatten, always_inline)) Ty &lookup(bool IsReadonly,
+ IdentTy *Ident) {
+ Ty &t = lookup32(Kind, IsReadonly, Ident);
return t;
}
- Ty &inc(int UpdateVal) {
- return (lookup(/* IsReadonly */ false) += UpdateVal);
+ __attribute__((flatten, always_inline)) Ty &inc(int UpdateVal,
+ IdentTy *Ident) {
+ return (lookup(/* IsReadonly */ false, Ident) += UpdateVal);
}
- Ty &set(Ty UpdateVal) { return (lookup(/* IsReadonly */ false) = UpdateVal); }
+ __attribute__((flatten, always_inline)) Ty &set(Ty UpdateVal,
+ IdentTy *Ident) {
+ return (lookup(/* IsReadonly */ false, Ident) = UpdateVal);
+ }
template <typename VTy, typename Ty2> friend struct ValueRAII;
};
@@ -99,7 +104,7 @@ template <typename Ty, ValueKind Kind> struct Value {
/// we can declare in global scope.
template <typename Ty, ValueKind Kind> struct PtrValue {
__attribute__((flatten, always_inline)) operator Ty() {
- return lookup(/* IsReadonly */ true);
+ return lookup(/* IsReadonly */ true, /* IdentTy */ nullptr);
}
__attribute__((flatten, always_inline)) PtrValue &operator=(const Ty Other) {
@@ -108,17 +113,19 @@ template <typename Ty, ValueKind Kind> struct PtrValue {
}
private:
- Ty &lookup(bool IsReadonly) { return lookupPtr(Kind, IsReadonly); }
+ Ty &lookup(bool IsReadonly, IdentTy *) { return lookupPtr(Kind, IsReadonly); }
- Ty &set(Ty UpdateVal) { return (lookup(/* IsReadonly */ false) = UpdateVal); }
+ Ty &set(Ty UpdateVal) {
+ return (lookup(/* IsReadonly */ false, /* IdentTy */ nullptr) = UpdateVal);
+ }
template <typename VTy, typename Ty2> friend struct ValueRAII;
};
template <typename VTy, typename Ty> struct ValueRAII {
- ValueRAII(VTy &V, Ty NewValue, Ty OldValue, bool Active)
- : Ptr(Active ? V.lookup(/* IsReadonly */ false) : Val), Val(OldValue),
- Active(Active) {
+ ValueRAII(VTy &V, Ty NewValue, Ty OldValue, bool Active, IdentTy *Ident)
+ : Ptr(Active ? V.lookup(/* IsReadonly */ false, Ident) : Val),
+ Val(OldValue), Active(Active) {
if (!Active)
return;
ASSERT(Ptr == OldValue && "ValueRAII initialization with wrong old value!");
diff --git a/openmp/libomptarget/DeviceRTL/src/Parallelism.cpp b/openmp/libomptarget/DeviceRTL/src/Parallelism.cpp
index 610512a5f799..4ce24937d26e 100644
--- a/openmp/libomptarget/DeviceRTL/src/Parallelism.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/Parallelism.cpp
@@ -87,7 +87,7 @@ void __kmpc_parallel_51(IdentTy *ident, int32_t, int32_t if_expr,
uint32_t TId = mapping::getThreadIdInBlock();
// Handle the serialized case first, same for SPMD/non-SPMD.
if (OMP_UNLIKELY(!if_expr || icv::Level)) {
- state::enterDataEnvironment();
+ state::DateEnvironmentRAII DERAII(ident);
++icv::Level;
invokeMicrotask(TId, 0, fn, args, nargs);
state::exitDataEnvironment();
@@ -104,9 +104,10 @@ void __kmpc_parallel_51(IdentTy *ident, int32_t, int32_t if_expr,
// last or the other updates will cause a thread specific state to be
// created.
state::ValueRAII ParallelTeamSizeRAII(state::ParallelTeamSize, NumThreads,
- 1u, TId == 0);
- state::ValueRAII ActiveLevelRAII(icv::ActiveLevel, 1u, 0u, TId == 0);
- state::ValueRAII LevelRAII(icv::Level, 1u, 0u, TId == 0);
+ 1u, TId == 0, ident);
+ state::ValueRAII ActiveLevelRAII(icv::ActiveLevel, 1u, 0u, TId == 0,
+ ident);
+ state::ValueRAII LevelRAII(icv::Level, 1u, 0u, TId == 0, ident);
// Synchronize all threads after the main thread (TId == 0) set up the
// team state properly.
@@ -142,7 +143,7 @@ void __kmpc_parallel_51(IdentTy *ident, int32_t, int32_t if_expr,
bool IsActiveParallelRegion = NumThreads > 1;
if (!IsActiveParallelRegion) {
- state::ValueRAII LevelRAII(icv::Level, 1u, 0u, true);
+ state::ValueRAII LevelRAII(icv::Level, 1u, 0u, true, ident);
invokeMicrotask(TId, 0, fn, args, nargs);
return;
}
@@ -160,11 +161,11 @@ void __kmpc_parallel_51(IdentTy *ident, int32_t, int32_t if_expr,
// last or the other updates will cause a thread specific state to be
// created.
state::ValueRAII ParallelTeamSizeRAII(state::ParallelTeamSize, NumThreads,
- 1u, true);
+ 1u, true, ident);
state::ValueRAII ParallelRegionFnRAII(state::ParallelRegionFn, wrapper_fn,
- (void *)nullptr, true);
- state::ValueRAII ActiveLevelRAII(icv::ActiveLevel, 1u, 0u, true);
- state::ValueRAII LevelRAII(icv::Level, 1u, 0u, true);
+ (void *)nullptr, true, ident);
+ state::ValueRAII ActiveLevelRAII(icv::ActiveLevel, 1u, 0u, true, ident);
+ state::ValueRAII LevelRAII(icv::Level, 1u, 0u, true, ident);
// Master signals work to activate workers.
synchronize::threads();
diff --git a/openmp/libomptarget/DeviceRTL/src/State.cpp b/openmp/libomptarget/DeviceRTL/src/State.cpp
index ee6295fdcadc..754c7067faac 100644
--- a/openmp/libomptarget/DeviceRTL/src/State.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/State.cpp
@@ -281,7 +281,7 @@ __attribute__((loader_uninitialized))
ThreadStateTy *ThreadStates[mapping::MaxThreadsPerTeam];
#pragma omp allocate(ThreadStates) allocator(omp_pteam_mem_alloc)
-uint32_t &lookupForModify32Impl(uint32_t ICVStateTy::*Var) {
+uint32_t &lookupForModify32Impl(uint32_t ICVStateTy::*Var, IdentTy *Ident) {
if (OMP_LIKELY(TeamState.ICVState.LevelVar == 0))
return TeamState.ICVState.*Var;
uint32_t TId = mapping::getThreadIdInBlock();
@@ -322,32 +322,32 @@ int returnValIfLevelIsActive(int Level, int Val, int DefaultVal,
} // namespace
-uint32_t &state::lookup32(ValueKind Kind, bool IsReadonly) {
+uint32_t &state::lookup32(ValueKind Kind, bool IsReadonly, IdentTy *Ident) {
switch (Kind) {
case state::VK_NThreads:
if (IsReadonly)
return lookup32Impl(&ICVStateTy::NThreadsVar);
- return lookupForModify32Impl(&ICVStateTy::NThreadsVar);
+ return lookupForModify32Impl(&ICVStateTy::NThreadsVar, Ident);
case state::VK_Level:
if (IsReadonly)
return lookup32Impl(&ICVStateTy::LevelVar);
- return lookupForModify32Impl(&ICVStateTy::LevelVar);
+ return lookupForModify32Impl(&ICVStateTy::LevelVar, Ident);
case state::VK_ActiveLevel:
if (IsReadonly)
return lookup32Impl(&ICVStateTy::ActiveLevelVar);
- return lookupForModify32Impl(&ICVStateTy::ActiveLevelVar);
+ return lookupForModify32Impl(&ICVStateTy::ActiveLevelVar, Ident);
case state::VK_MaxActiveLevels:
if (IsReadonly)
return lookup32Impl(&ICVStateTy::MaxActiveLevelsVar);
- return lookupForModify32Impl(&ICVStateTy::MaxActiveLevelsVar);
+ return lookupForModify32Impl(&ICVStateTy::MaxActiveLevelsVar, Ident);
case state::VK_RunSched:
if (IsReadonly)
return lookup32Impl(&ICVStateTy::RunSchedVar);
- return lookupForModify32Impl(&ICVStateTy::RunSchedVar);
+ return lookupForModify32Impl(&ICVStateTy::RunSchedVar, Ident);
case state::VK_RunSchedChunk:
if (IsReadonly)
return lookup32Impl(&ICVStateTy::RunSchedChunkVar);
- return lookupForModify32Impl(&ICVStateTy::RunSchedChunkVar);
+ return lookupForModify32Impl(&ICVStateTy::RunSchedChunkVar, Ident);
case state::VK_ParallelTeamSize:
return TeamState.ParallelTeamSize;
default:
@@ -376,7 +376,7 @@ void state::init(bool IsSPMD) {
ThreadStates[mapping::getThreadIdInBlock()] = nullptr;
}
-void state::enterDataEnvironment() {
+void state::enterDataEnvironment(IdentTy *Ident) {
unsigned TId = mapping::getThreadIdInBlock();
ThreadStateTy *NewThreadState =
static_cast<ThreadStateTy *>(__kmpc_alloc_shared(sizeof(ThreadStateTy)));
diff --git a/openmp/libomptarget/DeviceRTL/src/Tasking.cpp b/openmp/libomptarget/DeviceRTL/src/Tasking.cpp
index 0416395b1054..2c80e71a2fb4 100644
--- a/openmp/libomptarget/DeviceRTL/src/Tasking.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/Tasking.cpp
@@ -49,7 +49,7 @@ int32_t __kmpc_omp_task_with_deps(IdentTy *Loc, uint32_t TId,
TaskDescriptorTy *TaskDescriptor, int32_t,
void *, int32_t, void *) {
FunctionTracingRAII();
- state::DateEnvironmentRAII DERAII;
+ state::DateEnvironmentRAII DERAII(Loc);
TaskDescriptor->TaskFn(0, TaskDescriptor);
@@ -60,7 +60,7 @@ int32_t __kmpc_omp_task_with_deps(IdentTy *Loc, uint32_t TId,
void __kmpc_omp_task_begin_if0(IdentTy *Loc, uint32_t TId,
TaskDescriptorTy *TaskDescriptor) {
FunctionTracingRAII();
- state::enterDataEnvironment();
+ state::enterDataEnvironment(Loc);
}
void __kmpc_omp_task_complete_if0(IdentTy *Loc, uint32_t TId,
More information about the Openmp-commits
mailing list