[Openmp-commits] [openmp] f8ee045 - [OpenMP] Eliminate the ThreadStates array in favor of indirection
Johannes Doerfert via Openmp-commits
openmp-commits at lists.llvm.org
Tue Oct 4 20:29:18 PDT 2022
Author: Johannes Doerfert
Date: 2022-10-04T20:27:34-07:00
New Revision: f8ee045c6d934f64bcdab6c9fb4fc5c36c33938f
URL: https://github.com/llvm/llvm-project/commit/f8ee045c6d934f64bcdab6c9fb4fc5c36c33938f
DIFF: https://github.com/llvm/llvm-project/commit/f8ee045c6d934f64bcdab6c9fb4fc5c36c33938f.diff
LOG: [OpenMP] Eliminate the ThreadStates array in favor of indirection
If we have thread states, the program is going to be rather slow. If we
don't, we want to avoid wasting shared memory. This patch introduces a
slight penalty (malloc + indirection) for the slow path and reduces
resource usage for the fast path.
Differential Revision: https://reviews.llvm.org/D135037
Added:
Modified:
openmp/libomptarget/DeviceRTL/include/State.h
openmp/libomptarget/DeviceRTL/include/Types.h
openmp/libomptarget/DeviceRTL/src/State.cpp
Removed:
################################################################################
diff --git a/openmp/libomptarget/DeviceRTL/include/State.h b/openmp/libomptarget/DeviceRTL/include/State.h
index c58017b5d3cf..b50ad4ea9cf8 100644
--- a/openmp/libomptarget/DeviceRTL/include/State.h
+++ b/openmp/libomptarget/DeviceRTL/include/State.h
@@ -109,7 +109,7 @@ struct ThreadStateTy {
}
};
-extern ThreadStateTy *ThreadStates[mapping::MaxThreadsPerTeam];
+extern ThreadStateTy **ThreadStates;
#pragma omp allocate(ThreadStates) allocator(omp_pteam_mem_alloc)
/// Initialize the state machinery. Must be called by all threads.
diff --git a/openmp/libomptarget/DeviceRTL/include/Types.h b/openmp/libomptarget/DeviceRTL/include/Types.h
index 8a0107d028cf..d5209f1d859e 100644
--- a/openmp/libomptarget/DeviceRTL/include/Types.h
+++ b/openmp/libomptarget/DeviceRTL/include/Types.h
@@ -33,6 +33,9 @@ using uint32_t = unsigned int;
using int64_t = long;
using uint64_t = unsigned long;
using size_t = decltype(sizeof(char));
+// TODO: Properly implement this
+using intptr_t = int64_t;
+using uintptr_t = uint64_t;
static_assert(sizeof(int8_t) == 1, "type size mismatch");
static_assert(sizeof(uint8_t) == 1, "type size mismatch");
diff --git a/openmp/libomptarget/DeviceRTL/src/State.cpp b/openmp/libomptarget/DeviceRTL/src/State.cpp
index 59e6f488c58b..fc0c734f53aa 100644
--- a/openmp/libomptarget/DeviceRTL/src/State.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/State.cpp
@@ -12,6 +12,7 @@
#include "Configuration.h"
#include "Debug.h"
#include "Interface.h"
+#include "Mapping.h"
#include "Synchronization.h"
#include "Types.h"
#include "Utils.h"
@@ -221,10 +222,7 @@ void state::TeamStateTy::assertEqual(TeamStateTy &Other) const {
}
state::TeamStateTy SHARED(_OMP::state::TeamState);
-
-__attribute__((loader_uninitialized))
-state::ThreadStateTy *_OMP::state::ThreadStates[mapping::MaxThreadsPerTeam];
-#pragma omp allocate(_OMP::state::ThreadStates) allocator(omp_pteam_mem_alloc)
+state::ThreadStateTy **SHARED(_OMP::state::ThreadStates);
namespace {
@@ -248,18 +246,32 @@ void state::init(bool IsSPMD) {
if (mapping::isInitialThreadInLevel0(IsSPMD)) {
TeamState.init(IsSPMD);
DebugEntryRAII::init();
+ ThreadStates = nullptr;
}
-
- ThreadStates[mapping::getThreadIdInBlock()] = nullptr;
}
void state::enterDataEnvironment(IdentTy *Ident) {
ASSERT(config::mayUseThreadStates() &&
"Thread state modified while explicitly disabled!");
+ if (!config::mayUseThreadStates())
+ return;
unsigned TId = mapping::getThreadIdInBlock();
ThreadStateTy *NewThreadState =
static_cast<ThreadStateTy *>(__kmpc_alloc_shared(sizeof(ThreadStateTy)));
+ uintptr_t *ThreadStatesBitsPtr = reinterpret_cast<uintptr_t *>(&ThreadStates);
+ if (!atomic::load(ThreadStatesBitsPtr, atomic::seq_cst)) {
+ uint32_t Bytes = sizeof(ThreadStates[0]) * mapping::getBlockSize();
+ void *ThreadStatesPtr =
+ memory::allocShared(Bytes, "Thread state array allocation");
+ if (!atomic::cas(ThreadStatesBitsPtr, uintptr_t(0),
+ reinterpret_cast<uintptr_t>(ThreadStatesPtr),
+ atomic::seq_cst, atomic::seq_cst))
+ memory::freeShared(ThreadStatesPtr, Bytes,
+ "Thread state array allocated multiple times");
+ ASSERT(atomic::load(ThreadStatesBitsPtr, atomic::seq_cst) &&
+ "Expected valid thread states bit!");
+ }
NewThreadState->init(ThreadStates[TId]);
TeamState.HasThreadState = true;
ThreadStates[TId] = NewThreadState;
@@ -274,6 +286,8 @@ void state::exitDataEnvironment() {
}
void state::resetStateForThread(uint32_t TId) {
+ if (!config::mayUseThreadStates())
+ return;
if (OMP_LIKELY(!TeamState.HasThreadState || !ThreadStates[TId]))
return;
@@ -295,7 +309,6 @@ void state::assumeInitialState(bool IsSPMD) {
TeamStateTy InitialTeamState;
InitialTeamState.init(IsSPMD);
InitialTeamState.assertEqual(TeamState);
- ASSERT(!ThreadStates[mapping::getThreadIdInBlock()]);
ASSERT(mapping::isSPMDMode() == IsSPMD);
}
More information about the Openmp-commits
mailing list