[Openmp-commits] [openmp] 29df4ab - [OpenMP][Offloading] Add support for event related interfaces
Shilei Tian via Openmp-commits
openmp-commits at lists.llvm.org
Sat Aug 28 13:24:19 PDT 2021
Author: Shilei Tian
Date: 2021-08-28T16:24:14-04:00
New Revision: 29df4ab3f3c9bf37529ee04795abfd90b7691857
URL: https://github.com/llvm/llvm-project/commit/29df4ab3f3c9bf37529ee04795abfd90b7691857
DIFF: https://github.com/llvm/llvm-project/commit/29df4ab3f3c9bf37529ee04795abfd90b7691857.diff
LOG: [OpenMP][Offloading] Add support for event related interfaces
This patch adds the support form event related interfaces, which will be used
later to fix data race. See D104418 for more details.
Reviewed By: jdoerfert, ye-luo
Differential Revision: https://reviews.llvm.org/D108528
Added:
Modified:
openmp/libomptarget/include/omptargetplugin.h
openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.cpp
openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.h
openmp/libomptarget/plugins/cuda/src/rtl.cpp
openmp/libomptarget/plugins/exports
openmp/libomptarget/src/device.cpp
openmp/libomptarget/src/device.h
openmp/libomptarget/src/rtl.cpp
openmp/libomptarget/src/rtl.h
Removed:
################################################################################
diff --git a/openmp/libomptarget/include/omptargetplugin.h b/openmp/libomptarget/include/omptargetplugin.h
index b7b3eb806981..aefad9ec25a7 100644
--- a/openmp/libomptarget/include/omptargetplugin.h
+++ b/openmp/libomptarget/include/omptargetplugin.h
@@ -145,6 +145,32 @@ void __tgt_rtl_set_info_flag(uint32_t);
// Print the device information
void __tgt_rtl_print_device_info(int32_t ID);
+// Event related interfaces. It is expected to use the interfaces in the
+// following way:
+// 1) Create an event on the target device (__tgt_rtl_create_event).
+// 2) Record the event based on the status of \p AsyncInfo->Queue at the moment
+// of function call to __tgt_rtl_record_event. An event becomes "meaningful"
+// once it is recorded, such that others can depend on it.
+// 3) Call __tgt_rtl_wait_event to set dependence on the event. Whether the
+// operation is blocking or non-blocking depends on the target. It is expected
+// to be non-blocking, just set dependence and return.
+// 4) Call __tgt_rtl_sync_event to sync the event. It is expected to block the
+// thread calling the function.
+// 5) Destroy the event (__tgt_rtl_destroy_event).
+// {
+int32_t __tgt_rtl_create_event(int32_t ID, void **Event);
+
+int32_t __tgt_rtl_record_event(int32_t ID, void *Event,
+ __tgt_async_info *AsyncInfo);
+
+int32_t __tgt_rtl_wait_event(int32_t ID, void *Event,
+ __tgt_async_info *AsyncInfo);
+
+int32_t __tgt_rtl_sync_event(int32_t ID, void *Event);
+
+int32_t __tgt_rtl_destroy_event(int32_t ID, void *Event);
+// }
+
#ifdef __cplusplus
}
#endif
diff --git a/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.cpp b/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.cpp
index fb776f7ae558..9e9cea0beb4f 100644
--- a/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.cpp
+++ b/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.cpp
@@ -69,6 +69,12 @@ DLWRAP(cuMemcpyPeerAsync, 6);
DLWRAP(cuCtxGetLimit, 2);
DLWRAP(cuCtxSetLimit, 2);
+DLWRAP(cuEventCreate, 2);
+DLWRAP(cuEventRecord, 2);
+DLWRAP(cuStreamWaitEvent, 3);
+DLWRAP(cuEventSynchronize, 1);
+DLWRAP(cuEventDestroy, 1);
+
DLWRAP_FINALIZE();
#ifndef DYNAMIC_CUDA_PATH
diff --git a/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.h b/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.h
index 14049e1f7559..c6aeafef2df1 100644
--- a/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.h
+++ b/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.h
@@ -22,6 +22,7 @@ typedef struct CUmod_st *CUmodule;
typedef struct CUctx_st *CUcontext;
typedef struct CUfunc_st *CUfunction;
typedef struct CUstream_st *CUstream;
+typedef struct CUevent_st *CUevent;
typedef enum cudaError_enum {
CUDA_SUCCESS = 0,
@@ -248,4 +249,10 @@ CUresult cuMemcpyPeerAsync(CUdeviceptr, CUcontext, CUdeviceptr, CUcontext,
CUresult cuCtxGetLimit(size_t *, CUlimit);
CUresult cuCtxSetLimit(CUlimit, size_t);
+CUresult cuEventCreate(CUevent *, unsigned int);
+CUresult cuEventRecord(CUevent, CUstream);
+CUresult cuStreamWaitEvent(CUstream, CUevent, unsigned int);
+CUresult cuEventSynchronize(CUevent);
+CUresult cuEventDestroy(CUevent);
+
#endif
diff --git a/openmp/libomptarget/plugins/cuda/src/rtl.cpp b/openmp/libomptarget/plugins/cuda/src/rtl.cpp
index 44fc67225d19..c6f51a5a57bf 100644
--- a/openmp/libomptarget/plugins/cuda/src/rtl.cpp
+++ b/openmp/libomptarget/plugins/cuda/src/rtl.cpp
@@ -129,6 +129,62 @@ int memcpyDtoD(const void *SrcPtr, void *DstPtr, int64_t Size,
return OFFLOAD_SUCCESS;
}
+int createEvent(void **P) {
+ CUevent Event = nullptr;
+
+ CUresult Err = cuEventCreate(&Event, CU_EVENT_DEFAULT);
+ if (Err != CUDA_SUCCESS) {
+ DP("Error when creating event event = " DPxMOD "\n", DPxPTR(Event));
+ CUDA_ERR_STRING(Err);
+ return OFFLOAD_FAIL;
+ }
+
+ *P = Event;
+
+ return OFFLOAD_SUCCESS;
+}
+
+int recordEvent(void *EventPtr, __tgt_async_info *AsyncInfo) {
+ CUstream Stream = reinterpret_cast<CUstream>(AsyncInfo->Queue);
+ CUevent Event = reinterpret_cast<CUevent>(EventPtr);
+
+ CUresult Err = cuEventRecord(Event, Stream);
+ if (Err != CUDA_SUCCESS) {
+ DP("Error when recording event. stream = " DPxMOD ", event = " DPxMOD "\n",
+ DPxPTR(Stream), DPxPTR(Event));
+ CUDA_ERR_STRING(Err);
+ return OFFLOAD_FAIL;
+ }
+
+ return OFFLOAD_SUCCESS;
+}
+
+int syncEvent(void *EventPtr) {
+ CUevent Event = reinterpret_cast<CUevent>(EventPtr);
+
+ CUresult Err = cuEventSynchronize(Event);
+ if (Err != CUDA_SUCCESS) {
+ DP("Error when syncing event = " DPxMOD "\n", DPxPTR(Event));
+ CUDA_ERR_STRING(Err);
+ return OFFLOAD_FAIL;
+ }
+
+ return OFFLOAD_SUCCESS;
+}
+
+int destroyEvent(void *EventPtr) {
+ CUevent Event = reinterpret_cast<CUevent>(EventPtr);
+
+ CUresult Err = cuEventDestroy(Event);
+ if (Err != CUDA_SUCCESS) {
+ DP("Error when destroying event = " DPxMOD "\n", DPxPTR(Event));
+ CUDA_ERR_STRING(Err);
+ return OFFLOAD_FAIL;
+ }
+
+ return OFFLOAD_SUCCESS;
+}
+
// Structure contains per-device data
struct DeviceDataTy {
/// List that contains all the kernels.
@@ -1332,6 +1388,25 @@ class DeviceRTLTy {
"Error returned from cuDeviceGetAttribute\n");
printf(" Compute Capabilities: \t\t%d%d \n", TmpInt, TmpInt2);
}
+
+ int waitEvent(const int DeviceId, __tgt_async_info *AsyncInfo,
+ void *EventPtr) const {
+ CUstream Stream = getStream(DeviceId, AsyncInfo);
+ CUevent Event = reinterpret_cast<CUevent>(EventPtr);
+
+ // We don't use CU_EVENT_WAIT_DEFAULT here as it is only available from
+ // specific CUDA version, and defined as 0x0. In previous version, per CUDA
+ // API document, that argument has to be 0x0.
+ CUresult Err = cuStreamWaitEvent(Stream, Event, 0);
+ if (Err != CUDA_SUCCESS) {
+ DP("Error when waiting event. stream = " DPxMOD ", event = " DPxMOD "\n",
+ DPxPTR(Stream), DPxPTR(Event));
+ CUDA_ERR_STRING(Err);
+ return OFFLOAD_FAIL;
+ }
+
+ return OFFLOAD_SUCCESS;
+ }
};
DeviceRTLTy DeviceRTL;
@@ -1537,6 +1612,41 @@ void __tgt_rtl_print_device_info(int32_t device_id) {
DeviceRTL.printDeviceInfo(device_id);
}
+int32_t __tgt_rtl_create_event(int32_t device_id, void **event) {
+ assert(event && "event is nullptr");
+ return createEvent(event);
+}
+
+int32_t __tgt_rtl_record_event(int32_t device_id, void *event_ptr,
+ __tgt_async_info *async_info_ptr) {
+ assert(async_info_ptr && "async_info_ptr is nullptr");
+ assert(async_info_ptr->Queue && "async_info_ptr->Queue is nullptr");
+ assert(event_ptr && "event_ptr is nullptr");
+
+ return recordEvent(event_ptr, async_info_ptr);
+}
+
+int32_t __tgt_rtl_wait_event(int32_t device_id, void *event_ptr,
+ __tgt_async_info *async_info_ptr) {
+ assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid");
+ assert(async_info_ptr && "async_info_ptr is nullptr");
+ assert(event_ptr && "event is nullptr");
+
+ return DeviceRTL.waitEvent(device_id, async_info_ptr, event_ptr);
+}
+
+int32_t __tgt_rtl_sync_event(int32_t device_id, void *event_ptr) {
+ assert(event_ptr && "event is nullptr");
+
+ return syncEvent(event_ptr);
+}
+
+int32_t __tgt_rtl_destroy_event(int32_t device_id, void *event_ptr) {
+ assert(event_ptr && "event is nullptr");
+
+ return destroyEvent(event_ptr);
+}
+
#ifdef __cplusplus
}
#endif
diff --git a/openmp/libomptarget/plugins/exports b/openmp/libomptarget/plugins/exports
index 61cc6746defd..0a3dc8a82c2e 100644
--- a/openmp/libomptarget/plugins/exports
+++ b/openmp/libomptarget/plugins/exports
@@ -24,6 +24,11 @@ VERS1.0 {
__tgt_rtl_supports_empty_images;
__tgt_rtl_set_info_flag;
__tgt_rtl_print_device_info;
+ __tgt_rtl_create_event;
+ __tgt_rtl_record_event;
+ __tgt_rtl_wait_event;
+ __tgt_rtl_sync_event;
+ __tgt_rtl_destroy_event;
local:
*;
};
diff --git a/openmp/libomptarget/src/device.cpp b/openmp/libomptarget/src/device.cpp
index f660d2321dfb..fd7c73df722c 100644
--- a/openmp/libomptarget/src/device.cpp
+++ b/openmp/libomptarget/src/device.cpp
@@ -553,6 +553,41 @@ int32_t DeviceTy::synchronize(AsyncInfoTy &AsyncInfo) {
return OFFLOAD_SUCCESS;
}
+int32_t DeviceTy::createEvent(void **Event) {
+ if (RTL->create_event)
+ return RTL->create_event(RTLDeviceID, Event);
+
+ return OFFLOAD_SUCCESS;
+}
+
+int32_t DeviceTy::recordEvent(void *Event, AsyncInfoTy &AsyncInfo) {
+ if (RTL->record_event)
+ return RTL->record_event(RTLDeviceID, Event, AsyncInfo);
+
+ return OFFLOAD_SUCCESS;
+}
+
+int32_t DeviceTy::waitEvent(void *Event, AsyncInfoTy &AsyncInfo) {
+ if (RTL->wait_event)
+ return RTL->wait_event(RTLDeviceID, Event, AsyncInfo);
+
+ return OFFLOAD_SUCCESS;
+}
+
+int32_t DeviceTy::syncEvent(void *Event) {
+ if (RTL->sync_event)
+ return RTL->sync_event(RTLDeviceID, Event);
+
+ return OFFLOAD_SUCCESS;
+}
+
+int32_t DeviceTy::destroyEvent(void *Event) {
+ if (RTL->create_event)
+ return RTL->destroy_event(RTLDeviceID, Event);
+
+ return OFFLOAD_SUCCESS;
+}
+
/// Check whether a device has an associated RTL and initialize it if it's not
/// already initialized.
bool device_is_ready(int device_num) {
diff --git a/openmp/libomptarget/src/device.h b/openmp/libomptarget/src/device.h
index 21cce3539349..58c6316ff6c3 100644
--- a/openmp/libomptarget/src/device.h
+++ b/openmp/libomptarget/src/device.h
@@ -275,10 +275,32 @@ struct DeviceTy {
/// OFFLOAD_SUCCESS/OFFLOAD_FAIL when succeeds/fails.
int32_t synchronize(AsyncInfoTy &AsyncInfo);
- /// Calls the corresponding print in the \p RTLDEVID
+ /// Calls the corresponding print in the \p RTLDEVID
/// device RTL to obtain the information of the specific device.
bool printDeviceInfo(int32_t RTLDevID);
+ /// Event related interfaces.
+ /// {
+ /// Create an event.
+ int32_t createEvent(void **Event);
+
+ /// Record the event based on status in AsyncInfo->Queue at the moment the
+ /// function is called.
+ int32_t recordEvent(void *Event, AsyncInfoTy &AsyncInfo);
+
+ /// Wait for an event. This function can be blocking or non-blocking,
+ /// depending on the implmentation. It is expected to set a dependence on the
+ /// event such that corresponding operations shall only start once the event
+ /// is fulfilled.
+ int32_t waitEvent(void *Event, AsyncInfoTy &AsyncInfo);
+
+ /// Synchronize the event. It is expected to block the thread.
+ int32_t syncEvent(void *Event);
+
+ /// Destroy the event.
+ int32_t destroyEvent(void *Event);
+ /// }
+
private:
// Call to RTL
void init(); // To be called only via DeviceTy::initOnce()
diff --git a/openmp/libomptarget/src/rtl.cpp b/openmp/libomptarget/src/rtl.cpp
index 47d3a0f544db..264b1d4f7d33 100644
--- a/openmp/libomptarget/src/rtl.cpp
+++ b/openmp/libomptarget/src/rtl.cpp
@@ -183,6 +183,14 @@ void RTLsTy::LoadRTLs() {
dlsym(dynlib_handle, "__tgt_rtl_set_info_flag");
*((void **)&R.print_device_info) =
dlsym(dynlib_handle, "__tgt_rtl_print_device_info");
+ *((void **)&R.create_event) =
+ dlsym(dynlib_handle, "__tgt_rtl_create_event");
+ *((void **)&R.record_event) =
+ dlsym(dynlib_handle, "__tgt_rtl_record_event");
+ *((void **)&R.wait_event) = dlsym(dynlib_handle, "__tgt_rtl_wait_event");
+ *((void **)&R.sync_event) = dlsym(dynlib_handle, "__tgt_rtl_sync_event");
+ *((void **)&R.destroy_event) =
+ dlsym(dynlib_handle, "__tgt_rtl_destroy_event");
}
#if OMPT_SUPPORT
diff --git a/openmp/libomptarget/src/rtl.h b/openmp/libomptarget/src/rtl.h
index db13927cff53..88328d23a528 100644
--- a/openmp/libomptarget/src/rtl.h
+++ b/openmp/libomptarget/src/rtl.h
@@ -57,6 +57,11 @@ struct RTLInfoTy {
typedef int32_t(supports_empty_images_ty)();
typedef void(print_device_info_ty)(int32_t);
typedef void(set_info_flag_ty)(uint32_t);
+ typedef int32_t(create_event_ty)(int32_t, void **);
+ typedef int32_t(record_event_ty)(int32_t, void *, __tgt_async_info *);
+ typedef int32_t(wait_event_ty)(int32_t, void *, __tgt_async_info *);
+ typedef int32_t(sync_event_ty)(int32_t, void *);
+ typedef int32_t(destroy_event_ty)(int32_t, void *);
int32_t Idx = -1; // RTL index, index is the number of devices
// of other RTLs that were registered before,
@@ -95,6 +100,11 @@ struct RTLInfoTy {
supports_empty_images_ty *supports_empty_images = nullptr;
set_info_flag_ty *set_info_flag = nullptr;
print_device_info_ty *print_device_info = nullptr;
+ create_event_ty *create_event = nullptr;
+ record_event_ty *record_event = nullptr;
+ wait_event_ty *wait_event = nullptr;
+ sync_event_ty *sync_event = nullptr;
+ destroy_event_ty *destroy_event = nullptr;
// Are there images associated with this RTL.
bool isUsed = false;
More information about the Openmp-commits
mailing list