[Openmp-commits] [PATCH] D135037: [OpenMP] Eliminate the ThreadStates array in favor of indirection
Johannes Doerfert via Phabricator via Openmp-commits
openmp-commits at lists.llvm.org
Sun Oct 2 09:50:08 PDT 2022
jdoerfert created this revision.
jdoerfert added reviewers: jhuber6, tianshilei1992.
Herald added subscribers: guansong, bollu, yaxunl.
Herald added a project: All.
jdoerfert requested review of this revision.
Herald added a subscriber: sstefan1.
Herald added a project: OpenMP.
If we have thread states, the program is going to be rather slow. If we
don't we want to avoid wasting shared memory. This patch introduces a
slight penalty (malloc + indirection) for the slow path and reduces
resource usage for the fast path.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D135037
Files:
openmp/libomptarget/DeviceRTL/include/State.h
openmp/libomptarget/DeviceRTL/include/Types.h
openmp/libomptarget/DeviceRTL/src/State.cpp
Index: openmp/libomptarget/DeviceRTL/src/State.cpp
===================================================================
--- openmp/libomptarget/DeviceRTL/src/State.cpp
+++ openmp/libomptarget/DeviceRTL/src/State.cpp
@@ -12,6 +12,7 @@
#include "Configuration.h"
#include "Debug.h"
#include "Interface.h"
+#include "Mapping.h"
#include "Synchronization.h"
#include "Types.h"
@@ -220,10 +221,7 @@
}
state::TeamStateTy SHARED(_OMP::state::TeamState);
-
-__attribute__((loader_uninitialized))
-state::ThreadStateTy *_OMP::state::ThreadStates[mapping::MaxThreadsPerTeam];
-#pragma omp allocate(_OMP::state::ThreadStates) allocator(omp_pteam_mem_alloc)
+state::ThreadStateTy **SHARED(_OMP::state::ThreadStates);
namespace {
@@ -247,9 +245,8 @@
if (mapping::isInitialThreadInLevel0(IsSPMD)) {
TeamState.init(IsSPMD);
DebugEntryRAII::init();
+ ThreadStates = nullptr;
}
-
- ThreadStates[mapping::getThreadIdInBlock()] = nullptr;
}
void state::enterDataEnvironment(IdentTy *Ident) {
@@ -259,6 +256,17 @@
unsigned TId = mapping::getThreadIdInBlock();
ThreadStateTy *NewThreadState =
static_cast<ThreadStateTy *>(__kmpc_alloc_shared(sizeof(ThreadStateTy)));
+ uintptr_t *ThreadStatesBitsPtr = reinterpret_cast<uintptr_t *>(ThreadStates);
+ if (!atomic::load(ThreadStatesBitsPtr, atomic::seq_cst)) {
+ void *ThreadStatesPtr =
+ memory::allocGlobal(sizeof(ThreadStates[0]) * mapping::getBlockSize(),
+ "Thread state array allocation");
+ if (!atomic::cas(ThreadStatesBitsPtr, uintptr_t(0),
+ reinterpret_cast<uintptr_t>(ThreadStatesPtr),
+ atomic::seq_cst, atomic::seq_cst))
+ memory::freeGlobal(ThreadStatesPtr,
+ "Thread state array allocated multiple times");
+ }
NewThreadState->init(ThreadStates[TId]);
TeamState.HasThreadState = true;
ThreadStates[TId] = NewThreadState;
@@ -294,7 +302,6 @@
TeamStateTy InitialTeamState;
InitialTeamState.init(IsSPMD);
InitialTeamState.assertEqual(TeamState);
- ASSERT(!ThreadStates[mapping::getThreadIdInBlock()]);
ASSERT(mapping::isSPMDMode() == IsSPMD);
}
Index: openmp/libomptarget/DeviceRTL/include/Types.h
===================================================================
--- openmp/libomptarget/DeviceRTL/include/Types.h
+++ openmp/libomptarget/DeviceRTL/include/Types.h
@@ -33,6 +33,9 @@
using int64_t = long;
using uint64_t = unsigned long;
using size_t = decltype(sizeof(char));
+// TODO: Properly implement this
+using intptr_t = int64_t;
+using uintptr_t = uint64_t;
static_assert(sizeof(int8_t) == 1, "type size mismatch");
static_assert(sizeof(uint8_t) == 1, "type size mismatch");
Index: openmp/libomptarget/DeviceRTL/include/State.h
===================================================================
--- openmp/libomptarget/DeviceRTL/include/State.h
+++ openmp/libomptarget/DeviceRTL/include/State.h
@@ -109,7 +109,7 @@
}
};
-extern ThreadStateTy *ThreadStates[mapping::MaxThreadsPerTeam];
+extern ThreadStateTy **ThreadStates;
#pragma omp allocate(ThreadStates) allocator(omp_pteam_mem_alloc)
/// Initialize the state machinery. Must be called by all threads.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D135037.464561.patch
Type: text/x-patch
Size: 3211 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/openmp-commits/attachments/20221002/c68ab71a/attachment-0001.bin>
More information about the Openmp-commits
mailing list