[Openmp-commits] [openmp] f8ee045 - [OpenMP] Eliminate the ThreadStates array in favor of indirection

Johannes Doerfert via Openmp-commits openmp-commits at lists.llvm.org
Tue Oct 4 20:29:18 PDT 2022


Author: Johannes Doerfert
Date: 2022-10-04T20:27:34-07:00
New Revision: f8ee045c6d934f64bcdab6c9fb4fc5c36c33938f

URL: https://github.com/llvm/llvm-project/commit/f8ee045c6d934f64bcdab6c9fb4fc5c36c33938f
DIFF: https://github.com/llvm/llvm-project/commit/f8ee045c6d934f64bcdab6c9fb4fc5c36c33938f.diff

LOG: [OpenMP] Eliminate the ThreadStates array in favor of indirection

If we have thread states, the program is going to be rather slow. If we
don't, we want to avoid wasting shared memory. This patch introduces a
slight penalty (malloc + indirection) for the slow path and reduces
resource usage for the fast path.

Differential Revision: https://reviews.llvm.org/D135037

Added: 
    

Modified: 
    openmp/libomptarget/DeviceRTL/include/State.h
    openmp/libomptarget/DeviceRTL/include/Types.h
    openmp/libomptarget/DeviceRTL/src/State.cpp

Removed: 
    


################################################################################
diff  --git a/openmp/libomptarget/DeviceRTL/include/State.h b/openmp/libomptarget/DeviceRTL/include/State.h
index c58017b5d3cf..b50ad4ea9cf8 100644
--- a/openmp/libomptarget/DeviceRTL/include/State.h
+++ b/openmp/libomptarget/DeviceRTL/include/State.h
@@ -109,7 +109,7 @@ struct ThreadStateTy {
   }
 };
 
-extern ThreadStateTy *ThreadStates[mapping::MaxThreadsPerTeam];
+extern ThreadStateTy **ThreadStates;
 #pragma omp allocate(ThreadStates) allocator(omp_pteam_mem_alloc)
 
 /// Initialize the state machinery. Must be called by all threads.

diff  --git a/openmp/libomptarget/DeviceRTL/include/Types.h b/openmp/libomptarget/DeviceRTL/include/Types.h
index 8a0107d028cf..d5209f1d859e 100644
--- a/openmp/libomptarget/DeviceRTL/include/Types.h
+++ b/openmp/libomptarget/DeviceRTL/include/Types.h
@@ -33,6 +33,9 @@ using uint32_t = unsigned int;
 using int64_t = long;
 using uint64_t = unsigned long;
 using size_t = decltype(sizeof(char));
+// TODO: Properly implement this
+using intptr_t = int64_t;
+using uintptr_t = uint64_t;
 
 static_assert(sizeof(int8_t) == 1, "type size mismatch");
 static_assert(sizeof(uint8_t) == 1, "type size mismatch");

diff  --git a/openmp/libomptarget/DeviceRTL/src/State.cpp b/openmp/libomptarget/DeviceRTL/src/State.cpp
index 59e6f488c58b..fc0c734f53aa 100644
--- a/openmp/libomptarget/DeviceRTL/src/State.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/State.cpp
@@ -12,6 +12,7 @@
 #include "Configuration.h"
 #include "Debug.h"
 #include "Interface.h"
+#include "Mapping.h"
 #include "Synchronization.h"
 #include "Types.h"
 #include "Utils.h"
@@ -221,10 +222,7 @@ void state::TeamStateTy::assertEqual(TeamStateTy &Other) const {
 }
 
 state::TeamStateTy SHARED(_OMP::state::TeamState);
-
-__attribute__((loader_uninitialized))
-state::ThreadStateTy *_OMP::state::ThreadStates[mapping::MaxThreadsPerTeam];
-#pragma omp allocate(_OMP::state::ThreadStates) allocator(omp_pteam_mem_alloc)
+state::ThreadStateTy **SHARED(_OMP::state::ThreadStates);
 
 namespace {
 
@@ -248,18 +246,32 @@ void state::init(bool IsSPMD) {
   if (mapping::isInitialThreadInLevel0(IsSPMD)) {
     TeamState.init(IsSPMD);
     DebugEntryRAII::init();
+    ThreadStates = nullptr;
   }
-
-  ThreadStates[mapping::getThreadIdInBlock()] = nullptr;
 }
 
 void state::enterDataEnvironment(IdentTy *Ident) {
   ASSERT(config::mayUseThreadStates() &&
          "Thread state modified while explicitly disabled!");
+  if (!config::mayUseThreadStates())
+    return;
 
   unsigned TId = mapping::getThreadIdInBlock();
   ThreadStateTy *NewThreadState =
       static_cast<ThreadStateTy *>(__kmpc_alloc_shared(sizeof(ThreadStateTy)));
+  uintptr_t *ThreadStatesBitsPtr = reinterpret_cast<uintptr_t *>(&ThreadStates);
+  if (!atomic::load(ThreadStatesBitsPtr, atomic::seq_cst)) {
+    uint32_t Bytes = sizeof(ThreadStates[0]) * mapping::getBlockSize();
+    void *ThreadStatesPtr =
+        memory::allocShared(Bytes, "Thread state array allocation");
+    if (!atomic::cas(ThreadStatesBitsPtr, uintptr_t(0),
+                     reinterpret_cast<uintptr_t>(ThreadStatesPtr),
+                     atomic::seq_cst, atomic::seq_cst))
+      memory::freeShared(ThreadStatesPtr, Bytes,
+                         "Thread state array allocated multiple times");
+    ASSERT(atomic::load(ThreadStatesBitsPtr, atomic::seq_cst) &&
+           "Expected valid thread states bit!");
+  }
   NewThreadState->init(ThreadStates[TId]);
   TeamState.HasThreadState = true;
   ThreadStates[TId] = NewThreadState;
@@ -274,6 +286,8 @@ void state::exitDataEnvironment() {
 }
 
 void state::resetStateForThread(uint32_t TId) {
+  if (!config::mayUseThreadStates())
+    return;
   if (OMP_LIKELY(!TeamState.HasThreadState || !ThreadStates[TId]))
     return;
 
@@ -295,7 +309,6 @@ void state::assumeInitialState(bool IsSPMD) {
   TeamStateTy InitialTeamState;
   InitialTeamState.init(IsSPMD);
   InitialTeamState.assertEqual(TeamState);
-  ASSERT(!ThreadStates[mapping::getThreadIdInBlock()]);
   ASSERT(mapping::isSPMDMode() == IsSPMD);
 }
 


        


More information about the Openmp-commits mailing list