[Openmp-commits] [PATCH] D135037: [OpenMP] Eliminate the ThreadStates array in favor of indirection

Johannes Doerfert via Phabricator via Openmp-commits openmp-commits at lists.llvm.org
Sun Oct 2 09:50:08 PDT 2022


jdoerfert created this revision.
jdoerfert added reviewers: jhuber6, tianshilei1992.
Herald added subscribers: guansong, bollu, yaxunl.
Herald added a project: All.
jdoerfert requested review of this revision.
Herald added a subscriber: sstefan1.
Herald added a project: OpenMP.

If we have thread states, the program is going to be rather slow. If we
don't we want to avoid wasting shared memory. This patch introduces a
slight penalty (malloc + indirection) for the slow path and reduces
resource usage for the fast path.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D135037

Files:
  openmp/libomptarget/DeviceRTL/include/State.h
  openmp/libomptarget/DeviceRTL/include/Types.h
  openmp/libomptarget/DeviceRTL/src/State.cpp


Index: openmp/libomptarget/DeviceRTL/src/State.cpp
===================================================================
--- openmp/libomptarget/DeviceRTL/src/State.cpp
+++ openmp/libomptarget/DeviceRTL/src/State.cpp
@@ -12,6 +12,7 @@
 #include "Configuration.h"
 #include "Debug.h"
 #include "Interface.h"
+#include "Mapping.h"
 #include "Synchronization.h"
 #include "Types.h"
 
@@ -220,10 +221,7 @@
 }
 
 state::TeamStateTy SHARED(_OMP::state::TeamState);
-
-__attribute__((loader_uninitialized))
-state::ThreadStateTy *_OMP::state::ThreadStates[mapping::MaxThreadsPerTeam];
-#pragma omp allocate(_OMP::state::ThreadStates) allocator(omp_pteam_mem_alloc)
+state::ThreadStateTy **SHARED(_OMP::state::ThreadStates);
 
 namespace {
 
@@ -247,9 +245,8 @@
   if (mapping::isInitialThreadInLevel0(IsSPMD)) {
     TeamState.init(IsSPMD);
     DebugEntryRAII::init();
+    ThreadStates = nullptr;
   }
-
-  ThreadStates[mapping::getThreadIdInBlock()] = nullptr;
 }
 
 void state::enterDataEnvironment(IdentTy *Ident) {
@@ -259,6 +256,17 @@
   unsigned TId = mapping::getThreadIdInBlock();
   ThreadStateTy *NewThreadState =
       static_cast<ThreadStateTy *>(__kmpc_alloc_shared(sizeof(ThreadStateTy)));
+  uintptr_t *ThreadStatesBitsPtr = reinterpret_cast<uintptr_t *>(ThreadStates);
+  if (!atomic::load(ThreadStatesBitsPtr, atomic::seq_cst)) {
+    void *ThreadStatesPtr =
+        memory::allocGlobal(sizeof(ThreadStates[0]) * mapping::getBlockSize(),
+                            "Thread state array allocation");
+    if (!atomic::cas(ThreadStatesBitsPtr, uintptr_t(0),
+                     reinterpret_cast<uintptr_t>(ThreadStatesPtr),
+                     atomic::seq_cst, atomic::seq_cst))
+      memory::freeGlobal(ThreadStatesPtr,
+                         "Thread state array allocated multiple times");
+  }
   NewThreadState->init(ThreadStates[TId]);
   TeamState.HasThreadState = true;
   ThreadStates[TId] = NewThreadState;
@@ -294,7 +302,6 @@
   TeamStateTy InitialTeamState;
   InitialTeamState.init(IsSPMD);
   InitialTeamState.assertEqual(TeamState);
-  ASSERT(!ThreadStates[mapping::getThreadIdInBlock()]);
   ASSERT(mapping::isSPMDMode() == IsSPMD);
 }
 
Index: openmp/libomptarget/DeviceRTL/include/Types.h
===================================================================
--- openmp/libomptarget/DeviceRTL/include/Types.h
+++ openmp/libomptarget/DeviceRTL/include/Types.h
@@ -33,6 +33,9 @@
 using int64_t = long;
 using uint64_t = unsigned long;
 using size_t = decltype(sizeof(char));
+// TODO: Properly implement this
+using intptr_t = int64_t;
+using uintptr_t = uint64_t;
 
 static_assert(sizeof(int8_t) == 1, "type size mismatch");
 static_assert(sizeof(uint8_t) == 1, "type size mismatch");
Index: openmp/libomptarget/DeviceRTL/include/State.h
===================================================================
--- openmp/libomptarget/DeviceRTL/include/State.h
+++ openmp/libomptarget/DeviceRTL/include/State.h
@@ -109,7 +109,7 @@
   }
 };
 
-extern ThreadStateTy *ThreadStates[mapping::MaxThreadsPerTeam];
+extern ThreadStateTy **ThreadStates;
 #pragma omp allocate(ThreadStates) allocator(omp_pteam_mem_alloc)
 
 /// Initialize the state machinery. Must be called by all threads.


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D135037.464561.patch
Type: text/x-patch
Size: 3211 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/openmp-commits/attachments/20221002/c68ab71a/attachment-0001.bin>


More information about the Openmp-commits mailing list