[Openmp-commits] [openmp] r327637 - [OpenMP][libomptarget] Enable multiple frames per global memory slot
Gheorghe-Teodor Bercea via Openmp-commits
openmp-commits at lists.llvm.org
Thu Mar 15 08:56:04 PDT 2018
Author: gbercea
Date: Thu Mar 15 08:56:04 2018
New Revision: 327637
URL: http://llvm.org/viewvc/llvm-project?rev=327637&view=rev
Log:
[OpenMP][libomptarget] Enable multiple frames per global memory slot
Summary: To save on calls to malloc, this patch enables the re-use of pre-allocated global memory slots.
Reviewers: ABataev, grokos, carlo.bertolli, caomhin
Reviewed By: grokos
Subscribers: guansong, openmp-commits
Differential Revision: https://reviews.llvm.org/D44470
Modified:
openmp/trunk/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
openmp/trunk/libomptarget/deviceRTLs/nvptx/src/interface.h
openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
Modified: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu?rev=327637&r1=327636&r2=327637&view=diff
==============================================================================
--- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu (original)
+++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu Thu Mar 15 08:56:04 2018
@@ -341,7 +341,17 @@ EXTERN void __kmpc_data_sharing_init_sta
__kmpc_data_sharing_slot *RootS = teamDescr->RootS(WID);
DataSharingState.SlotPtr[WID] = RootS;
- DataSharingState.StackPtr[WID] = (void *)&RootS->Data[0];
+ DataSharingState.TailPtr[WID] = RootS;
+
+ // Initialize the stack pointer to be equal to the end of
+ // the shared memory slot. This way we ensure that the global
+ // version of the stack will be used.
+ // TODO: remove this:
+ DataSharingState.StackPtr[WID] = RootS->DataEnd;
+
+ // TODO: When the use of shared memory is enabled we will have to
+ // initialize this with the start of the Data region like so:
+ // DataSharingState.StackPtr[WID] = (void *)&RootS->Data[0];
// We initialize the list of references to arguments here.
omptarget_nvptx_globalArgs.Init();
@@ -355,12 +365,8 @@ EXTERN void __kmpc_data_sharing_init_sta
// UseSharedMemory is set to true, the runtime will attempt to use shared memory
// as long as the size requested fits the pre-allocated size.
//
-// TODO: allow more than one push per slot to save on calls to malloc.
-// Currently there is only one slot for each push so the data size in the slot
-// is the same size as the size being requested.
-//
// Called by: master, TODO: call by workers
-EXTERN void* __kmpc_data_sharing_push_stack(size_t size,
+EXTERN void* __kmpc_data_sharing_push_stack(size_t DataSize,
int16_t UseSharedMemory) {
// TODO: Add shared memory support. For now, use global memory only for
// storing the data sharing slots so ignore the pre-allocated
@@ -374,39 +380,85 @@ EXTERN void* __kmpc_data_sharing_push_st
// global memory slot.
__kmpc_data_sharing_slot *&SlotP = DataSharingState.SlotPtr[WID];
__kmpc_data_sharing_slot *&TailSlotP = DataSharingState.TailPtr[WID];
+ void *&StackP = DataSharingState.StackPtr[WID];
+ void *FrameP = 0;
- // The slot for holding the data we are pushing.
- __kmpc_data_sharing_slot *NewSlot = 0;
- size_t NewSize = size;
-
- // Check if there is a next slot.
- if (__kmpc_data_sharing_slot *ExistingSlot = SlotP->Next) {
- // Attempt to re-use an existing slot provided the data fits in the slot.
- // The leftover data space will not be used.
- ptrdiff_t ExistingSlotSize = (uintptr_t)ExistingSlot->DataEnd -
- (uintptr_t)(&ExistingSlot->Data[0]);
- if (ExistingSlotSize >= NewSize)
- NewSlot = ExistingSlot;
- else
- free(ExistingSlot);
- }
+ // Check if we have room for the data in the current slot.
+ const uintptr_t StartAddress = (uintptr_t)StackP;
+ const uintptr_t EndAddress = (uintptr_t)SlotP->DataEnd;
+ const uintptr_t RequestedEndAddress = StartAddress + (uintptr_t)DataSize;
+
+ // If we requested more data than there is room for in the rest
+ // of the slot then we need to either re-use the next slot, if one exists,
+ // or create a new slot.
+ if (EndAddress < RequestedEndAddress) {
+ size_t NewSize = DataSize;
+
+ // The new or reused slot for holding the data being pushed.
+ __kmpc_data_sharing_slot *NewSlot = 0;
+
+ // Check if there is a next slot.
+ if (__kmpc_data_sharing_slot *ExistingSlot = SlotP->Next) {
+ // Attempt to reuse an existing slot provided the data fits in the slot.
+ // The leftover data space will not be used.
+ ptrdiff_t ExistingSlotSize = (uintptr_t)ExistingSlot->DataEnd -
+ (uintptr_t)(&ExistingSlot->Data[0]);
+
+ // Try to add the data in the next available slot. Search for a slot
+ // with enough space.
+ while (ExistingSlotSize < NewSize) {
+ SlotP->Next = ExistingSlot->Next;
+ SlotP->Next->Prev = ExistingSlot->Prev;
+ free(ExistingSlot);
+ ExistingSlot = SlotP->Next;
+ if (!ExistingSlot)
+ break;
+ ExistingSlotSize = (uintptr_t)ExistingSlot->DataEnd -
+ (uintptr_t)(&ExistingSlot->Data[0]);
+ }
+
+ // Check if a slot has been found.
+ if (ExistingSlotSize >= NewSize) {
+ NewSlot = ExistingSlot;
+ NewSlot->PrevSlotStackPtr = StackP;
+ }
+ }
- if (!NewSlot) {
- NewSlot = (__kmpc_data_sharing_slot *)malloc(
- sizeof(__kmpc_data_sharing_slot) + NewSize);
- NewSlot->Next = 0;
- NewSlot->Prev = SlotP;
+ if (!NewSlot) {
+ // Allocate at least the default size.
+ // TODO: generalize this for workers which need a larger data slot
+ // i.e. using DS_Worker_Warp_Slot_Size.
+ if (DS_Slot_Size > DataSize)
+ NewSize = DS_Slot_Size;
+ NewSlot = (__kmpc_data_sharing_slot *)malloc(
+ sizeof(__kmpc_data_sharing_slot) + NewSize);
+ NewSlot->Next = 0;
+ NewSlot->Prev = SlotP;
+ NewSlot->PrevSlotStackPtr = StackP;
+ NewSlot->DataEnd = &NewSlot->Data[NewSize];
- // This is the last slot, save it.
- TailSlotP = NewSlot;
- }
+ // Newly allocated slots are also tail slots.
+ TailSlotP = NewSlot;
- NewSlot->DataEnd = &NewSlot->Data[NewSize];
+ // Make previous slot point to the newly allocated slot.
+ SlotP->Next = NewSlot;
+ }
- SlotP->Next = NewSlot;
- SlotP = NewSlot;
+ // The current slot becomes the new slot.
+ SlotP = NewSlot;
+ // The stack pointer always points to the next free stack frame.
+ StackP = &NewSlot->Data[DataSize];
+ // The frame pointer always points to the beginning of the frame.
+ FrameP = &NewSlot->Data[0];
+ } else {
+ // Add the data chunk to the current slot. The frame pointer is set to
+ // point to the start of the new frame held in StackP.
+ FrameP = StackP;
+ // Reset stack pointer to the requested address.
+ StackP = (void *)RequestedEndAddress;
+ }
- return (void*)&SlotP->Data[0];
+ return FrameP;
}
// TODO: add memory fence here when this function can be called by
@@ -422,26 +474,43 @@ EXTERN void* __kmpc_data_sharing_push_st
// When the pop operation removes the last global memory slot,
// reclaim all outstanding global memory slots since it is
// likely we have reached the end of the kernel.
-EXTERN void __kmpc_data_sharing_pop_stack(void *a) {
+EXTERN void __kmpc_data_sharing_pop_stack(void *FrameStart) {
if (IsMasterThread()) {
unsigned WID = getWarpId();
- __kmpc_data_sharing_slot *S = DataSharingState.SlotPtr[WID];
+ __kmpc_data_sharing_slot *&SlotP = DataSharingState.SlotPtr[WID];
+ void *&StackP = DataSharingState.StackPtr[WID];
- if (S->Prev)
- S = S->Prev;
+ // If we try to pop the last frame of the current slot we need to
+ // move to the previous slot if there is one.
+ const uintptr_t StartAddress = (uintptr_t)FrameStart;
+ if (StartAddress == (uintptr_t)&SlotP->Data[0]) {
+ if (SlotP->Prev) {
+ // The new stack pointer is the end of the data field of the
+ // previous slot. This will allow the stack pointer to be
+ // used in the computation of the remaining data space in
+ // the current slot.
+ StackP = SlotP->PrevSlotStackPtr;
+ // Reset SlotP to previous slot.
+ SlotP = SlotP->Prev;
+ }
- // If this will "pop" the last global memory node then it is likely
- // that we are at the end of the data sharing region and we can
- // de-allocate any existing global memory slots.
- if (!S->Prev) {
- __kmpc_data_sharing_slot *Tail = DataSharingState.TailPtr[WID];
-
- while(Tail && Tail->Prev) {
- Tail = Tail->Prev;
- free(Tail->Next);
- Tail->Next=0;
+ // If this will "pop" the last global memory node then it is likely
+ // that we are at the end of the data sharing region and we can
+ // de-allocate any existing global memory slots.
+ if (!SlotP->Prev) {
+ __kmpc_data_sharing_slot *Tail = DataSharingState.TailPtr[WID];
+
+ while(Tail && Tail->Prev) {
+ Tail = Tail->Prev;
+ free(Tail->Next);
+ Tail->Next=0;
+ }
}
+ } else {
+ // This is not the last frame popped from this slot.
+ // Reset StackP
+ StackP = FrameStart;
}
return;
Modified: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/interface.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/deviceRTLs/nvptx/src/interface.h?rev=327637&r1=327636&r2=327637&view=diff
==============================================================================
--- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/interface.h (original)
+++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/interface.h Thu Mar 15 08:56:04 2018
@@ -497,6 +497,7 @@ EXTERN void __kmpc_get_shared_variables(
struct __kmpc_data_sharing_slot {
__kmpc_data_sharing_slot *Next;
__kmpc_data_sharing_slot *Prev;
+ void *PrevSlotStackPtr;
void *DataEnd;
char Data[];
};
Modified: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h?rev=327637&r1=327636&r2=327637&view=diff
==============================================================================
--- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h (original)
+++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h Thu Mar 15 08:56:04 2018
@@ -129,6 +129,7 @@ struct DataSharingStateTy {
struct __kmpc_data_sharing_worker_slot_static {
__kmpc_data_sharing_slot *Next;
__kmpc_data_sharing_slot *Prev;
+ void *PrevSlotStackPtr;
void *DataEnd;
char Data[DS_Worker_Warp_Slot_Size];
};
@@ -137,6 +138,7 @@ struct __kmpc_data_sharing_worker_slot_s
struct __kmpc_data_sharing_master_slot_static {
__kmpc_data_sharing_slot *Next;
__kmpc_data_sharing_slot *Prev;
+ void *PrevSlotStackPtr;
void *DataEnd;
char Data[DS_Slot_Size];
};
@@ -267,6 +269,7 @@ public:
// We currently do not have a next slot.
master_rootS[0].Next = 0;
master_rootS[0].Prev = 0;
+ master_rootS[0].PrevSlotStackPtr = 0;
return (__kmpc_data_sharing_slot *)&master_rootS[0];
}
// Initialize the pointer to the end of the slot given the size of the data
@@ -276,6 +279,7 @@ public:
// We currently do not have a next slot.
worker_rootS[wid].Next = 0;
worker_rootS[wid].Prev = 0;
+ worker_rootS[wid].PrevSlotStackPtr = 0;
return (__kmpc_data_sharing_slot *)&worker_rootS[wid];
}
More information about the Openmp-commits
mailing list