[Openmp-commits] [clang] [llvm] [openmp] [OpenMP][offload] Fix dynamic schedule tracking (PR #97065)
Gheorghe-Teodor Bercea via Openmp-commits
openmp-commits at lists.llvm.org
Fri Jun 28 10:24:44 PDT 2024
================
@@ -444,32 +444,81 @@ template <typename T, typename ST> struct omptarget_nvptx_LoopSupport {
// KMP interface implementation (dyn loops)
////////////////////////////////////////////////////////////////////////////////
-// TODO: This is a stopgap. We probably want to expand the dispatch API to take
-// an DST pointer which can then be allocated properly without malloc.
-static DynamicScheduleTracker *THREAD_LOCAL(ThreadDSTPtr);
+// TODO: Expand the dispatch API to take a DST pointer which can then be
+// allocated properly without malloc.
+// For now, each team will contain an LDS pointer (ThreadDST) to a global array
+// of references to the DST structs allocated (in global memory) for each thread
+// in the team. The global memory array is allocated during the init phase if it
+// was not allocated already and will be deallocated when the dispatch phase
+// ends:
+//
+// __kmpc_dispatch_init
+//
+// ** Dispatch loop **
+//
+// __kmpc_dispatch_deinit
+//
+static DynamicScheduleTracker **SHARED(ThreadDST);
// Create a new DST, link the current one, and define the new as current.
static DynamicScheduleTracker *pushDST() {
+ int32_t ThreadIndex = mapping::getThreadIdInBlock();
+ // Each block will allocate an array of pointers to DST structs. The array is
+ // equal in length to the number of threads in that block.
+ if (!ThreadDST) {
+ // Allocate global memory array of pointers to DST structs:
+ if (ThreadIndex == 0)
+ ThreadDST = static_cast<DynamicScheduleTracker **>(
+ memory::allocGlobal(mapping::getNumberOfThreadsInBlock() *
+ sizeof(DynamicScheduleTracker *),
+ "new ThreadDST array"));
+ synchronize::threads(atomic::seq_cst);
+
+ // Initialize the array pointers:
+ ThreadDST[ThreadIndex] = nullptr;
+ }
+
+ // Create a DST struct for the current thread:
DynamicScheduleTracker *NewDST = static_cast<DynamicScheduleTracker *>(
memory::allocGlobal(sizeof(DynamicScheduleTracker), "new DST"));
*NewDST = DynamicScheduleTracker({0});
- NewDST->NextDST = ThreadDSTPtr;
- ThreadDSTPtr = NewDST;
- return ThreadDSTPtr;
+
+ // Add the new DST struct to the array of DST structs:
+ NewDST->NextDST = ThreadDST[ThreadIndex];
+ ThreadDST[ThreadIndex] = NewDST;
+ return NewDST;
}
// Return the current DST.
-static DynamicScheduleTracker *peekDST() { return ThreadDSTPtr; }
+static DynamicScheduleTracker *peekDST() {
+ return ThreadDST[mapping::getThreadIdInBlock()];
+}
// Pop the current DST and restore the last one.
static void popDST() {
- DynamicScheduleTracker *OldDST = ThreadDSTPtr->NextDST;
- memory::freeGlobal(ThreadDSTPtr, "remove DST");
- ThreadDSTPtr = OldDST;
+ int32_t ThreadIndex = mapping::getThreadIdInBlock();
+ DynamicScheduleTracker *CurrentDST = ThreadDST[ThreadIndex];
+ DynamicScheduleTracker *OldDST = CurrentDST->NextDST;
+ memory::freeGlobal(CurrentDST, "remove DST");
+ ThreadDST[ThreadIndex] = OldDST;
+
+ // Check if we need to deallocate the global array. Ensure all threads
+ // in the block have finished deallocating the individual DSTs.
+ synchronize::threads(atomic::seq_cst);
+ if (ThreadDST[ThreadIndex] == 0 && ThreadIndex == 0)
+ memory::freeGlobal(ThreadDST, "remove ThreadDST array");
+ synchronize::threads(atomic::seq_cst);
----------------
doru1004 wrote:
For performance reasons?
https://github.com/llvm/llvm-project/pull/97065
More information about the Openmp-commits
mailing list