[Openmp-commits] [openmp] 03ff643 - [OpenMP] Put old APIs back and added new _async series for backward compatibility

Shilei Tian via Openmp-commits openmp-commits at lists.llvm.org
Thu Apr 9 19:41:03 PDT 2020


Author: Shilei Tian
Date: 2020-04-09T22:40:58-04:00
New Revision: 03ff643d2e9ebbf319d71b3a17d2ed0320a6a25b

URL: https://github.com/llvm/llvm-project/commit/03ff643d2e9ebbf319d71b3a17d2ed0320a6a25b
DIFF: https://github.com/llvm/llvm-project/commit/03ff643d2e9ebbf319d71b3a17d2ed0320a6a25b.diff

LOG: [OpenMP] Put old APIs back and added new _async series for backward compatibility

Summary: According to comments on bi-weekly meeting, this patch put back old APIs and added new `_async` series

Reviewers: jdoerfert

Reviewed By: jdoerfert

Subscribers: yaxunl, guansong, openmp-commits

Tags: #openmp

Differential Revision: https://reviews.llvm.org/D77822

Added: 
    

Modified: 
    openmp/libomptarget/include/omptargetplugin.h
    openmp/libomptarget/plugins/cuda/src/rtl.cpp
    openmp/libomptarget/plugins/exports
    openmp/libomptarget/plugins/generic-elf-64bit/src/rtl.cpp
    openmp/libomptarget/src/device.cpp
    openmp/libomptarget/src/device.h
    openmp/libomptarget/src/interface.cpp
    openmp/libomptarget/src/omptarget.cpp
    openmp/libomptarget/src/private.h
    openmp/libomptarget/src/rtl.cpp
    openmp/libomptarget/src/rtl.h

Removed: 
    


################################################################################
diff  --git a/openmp/libomptarget/include/omptargetplugin.h b/openmp/libomptarget/include/omptargetplugin.h
index b330c1935282..083e422aac16 100644
--- a/openmp/libomptarget/include/omptargetplugin.h
+++ b/openmp/libomptarget/include/omptargetplugin.h
@@ -58,21 +58,24 @@ __tgt_target_table *__tgt_rtl_load_binary(int32_t ID,
 // case an error occurred on the target device.
 void *__tgt_rtl_data_alloc(int32_t ID, int64_t Size, void *HostPtr);
 
-// Pass the data content to the target device using the target address. If
-// AsyncInfoPtr is nullptr, it is synchronous; otherwise it is asynchronous.
-// However, AsyncInfoPtr may be ignored on some platforms, like x86_64. In that
-// case, it is synchronous. In case of success, return zero. Otherwise, return
-// an error code.
+// Pass the data content to the target device using the target address. In case
+// of success, return zero. Otherwise, return an error code.
 int32_t __tgt_rtl_data_submit(int32_t ID, void *TargetPtr, void *HostPtr,
-                              int64_t Size, __tgt_async_info *AsyncInfoPtr);
+                              int64_t Size);
 
-// Retrieve the data content from the target device using its address. If
-// AsyncInfoPtr is nullptr, it is synchronous; otherwise it is asynchronous.
-// However, AsyncInfoPtr may be ignored on some platforms, like x86_64. In that
-// case, it is synchronous. In case of success, return zero. Otherwise, return
-// an error code.
+int32_t __tgt_rtl_data_submit_async(int32_t ID, void *TargetPtr, void *HostPtr,
+                                    int64_t Size,
+                                    __tgt_async_info *AsyncInfoPtr);
+
+// Retrieve the data content from the target device using its address. In case
+// of success, return zero. Otherwise, return an error code.
 int32_t __tgt_rtl_data_retrieve(int32_t ID, void *HostPtr, void *TargetPtr,
-                                int64_t Size, __tgt_async_info *AsyncInfoPtr);
+                                int64_t Size);
+
+// Asynchronous version of __tgt_rtl_data_retrieve
+int32_t __tgt_rtl_data_retrieve_async(int32_t ID, void *HostPtr,
+                                      void *TargetPtr, int64_t Size,
+                                      __tgt_async_info *AsyncInfoPtr);
 
 // De-allocate the data referenced by target ptr on the device. In case of
 // success, return zero. Otherwise, return an error code.
@@ -86,8 +89,12 @@ int32_t __tgt_rtl_data_delete(int32_t ID, void *TargetPtr);
 // ignored on some platforms, like x86_64. In that case, it is synchronous. In
 // case of success, return zero. Otherwise, return an error code.
 int32_t __tgt_rtl_run_target_region(int32_t ID, void *Entry, void **Args,
-                                    ptr
diff _t *Offsets, int32_t NumArgs,
-                                    __tgt_async_info *AsyncInfoPtr);
+                                    ptr
diff _t *Offsets, int32_t NumArgs);
+
+// Asynchronous version of __tgt_rtl_run_target_region
+int32_t __tgt_rtl_run_target_region_async(int32_t ID, void *Entry, void **Args,
+                                          ptr
diff _t *Offsets, int32_t NumArgs,
+                                          __tgt_async_info *AsyncInfoPtr);
 
 // Similar to __tgt_rtl_run_target_region, but additionally specify the
 // number of teams to be created and a number of threads in each team. If
@@ -97,8 +104,13 @@ int32_t __tgt_rtl_run_target_region(int32_t ID, void *Entry, void **Args,
 int32_t __tgt_rtl_run_target_team_region(int32_t ID, void *Entry, void **Args,
                                          ptr
diff _t *Offsets, int32_t NumArgs,
                                          int32_t NumTeams, int32_t ThreadLimit,
-                                         uint64_t loop_tripcount,
-                                         __tgt_async_info *AsyncInfoPtr);
+                                         uint64_t loop_tripcount);
+
+// Asynchronous version of __tgt_rtl_run_target_team_region
+int32_t __tgt_rtl_run_target_team_region_async(
+    int32_t ID, void *Entry, void **Args, ptr
diff _t *Offsets, int32_t NumArgs,
+    int32_t NumTeams, int32_t ThreadLimit, uint64_t loop_tripcount,
+    __tgt_async_info *AsyncInfoPtr);
 
 // Device synchronization. In case of success, return zero. Otherwise, return an
 // error code.

diff  --git a/openmp/libomptarget/plugins/cuda/src/rtl.cpp b/openmp/libomptarget/plugins/cuda/src/rtl.cpp
index c0fb87b8e19d..1147f821b7ae 100644
--- a/openmp/libomptarget/plugins/cuda/src/rtl.cpp
+++ b/openmp/libomptarget/plugins/cuda/src/rtl.cpp
@@ -725,40 +725,41 @@ void *__tgt_rtl_data_alloc(int32_t device_id, int64_t size, void *hst_ptr) {
 }
 
 int32_t __tgt_rtl_data_submit(int32_t device_id, void *tgt_ptr, void *hst_ptr,
-                              int64_t size, __tgt_async_info *async_info_ptr) {
-  // The function dataSubmit is always asynchronous. Considering some data
-  // transfer must be synchronous, we assume if async_info_ptr is nullptr, the
-  // transfer will be synchronous by creating a temporary async info and then
-  // synchronizing after call dataSubmit; otherwise, it is asynchronous.
-  if (async_info_ptr)
-    return dataSubmit(device_id, tgt_ptr, hst_ptr, size, async_info_ptr);
-
+                              int64_t size) {
   __tgt_async_info async_info;
-  int32_t rc = dataSubmit(device_id, tgt_ptr, hst_ptr, size, &async_info);
+  int32_t rc = __tgt_rtl_data_submit_async(device_id, tgt_ptr, hst_ptr, size,
+                                           &async_info);
   if (rc != OFFLOAD_SUCCESS)
     return OFFLOAD_FAIL;
 
   return __tgt_rtl_synchronize(device_id, &async_info);
 }
 
-int32_t __tgt_rtl_data_retrieve(int32_t device_id, void *hst_ptr, void *tgt_ptr,
-                                int64_t size,
-                                __tgt_async_info *async_info_ptr) {
-  // The function dataRetrieve is always asynchronous. Considering some data
-  // transfer must be synchronous, we assume if async_info_ptr is nullptr, the
-  // transfer will be synchronous by creating a temporary async info and then
-  // synchronizing after call dataRetrieve; otherwise, it is asynchronous.
-  if (async_info_ptr)
-    return dataRetrieve(device_id, hst_ptr, tgt_ptr, size, async_info_ptr);
+int32_t __tgt_rtl_data_submit_async(int32_t device_id, void *tgt_ptr,
+                                    void *hst_ptr, int64_t size,
+                                    __tgt_async_info *async_info_ptr) {
+  assert(async_info_ptr && "async_info_ptr is nullptr");
+  return dataSubmit(device_id, tgt_ptr, hst_ptr, size, async_info_ptr);
+}
 
+int32_t __tgt_rtl_data_retrieve(int32_t device_id, void *hst_ptr, void *tgt_ptr,
+                                int64_t size) {
   __tgt_async_info async_info;
-  int32_t rc = dataRetrieve(device_id, hst_ptr, tgt_ptr, size, &async_info);
+  int32_t rc = __tgt_rtl_data_retrieve_async(device_id, hst_ptr, tgt_ptr, size,
+                                             &async_info);
   if (rc != OFFLOAD_SUCCESS)
     return OFFLOAD_FAIL;
 
   return __tgt_rtl_synchronize(device_id, &async_info);
 }
 
+int32_t __tgt_rtl_data_retrieve_async(int32_t device_id, void *hst_ptr,
+                                      void *tgt_ptr, int64_t size,
+                                      __tgt_async_info *async_info_ptr) {
+  assert(async_info_ptr && "async_info_ptr is nullptr");
+  return dataRetrieve(device_id, hst_ptr, tgt_ptr, size, async_info_ptr);
+}
+
 int32_t __tgt_rtl_data_delete(int32_t device_id, void *tgt_ptr) {
   // Set the context we are using.
   CUresult err = cuCtxSetCurrent(DeviceInfo.Contexts[device_id]);
@@ -782,8 +783,22 @@ int32_t __tgt_rtl_run_target_team_region(int32_t device_id, void *tgt_entry_ptr,
                                          ptr
diff _t *tgt_offsets,
                                          int32_t arg_num, int32_t team_num,
                                          int32_t thread_limit,
-                                         uint64_t loop_tripcount,
-                                         __tgt_async_info *async_info) {
+                                         uint64_t loop_tripcount) {
+  __tgt_async_info async_info;
+  int32_t rc = __tgt_rtl_run_target_team_region_async(
+      device_id, tgt_entry_ptr, tgt_args, tgt_offsets, arg_num, team_num,
+      thread_limit, loop_tripcount, &async_info);
+  if (rc != OFFLOAD_SUCCESS)
+    return OFFLOAD_FAIL;
+
+  return __tgt_rtl_synchronize(device_id, &async_info);
+}
+
+int32_t __tgt_rtl_run_target_team_region_async(
+    int32_t device_id, void *tgt_entry_ptr, void **tgt_args,
+    ptr
diff _t *tgt_offsets, int32_t arg_num, int32_t team_num,
+    int32_t thread_limit, uint64_t loop_tripcount,
+    __tgt_async_info *async_info) {
   // Set the context we are using.
   CUresult err = cuCtxSetCurrent(DeviceInfo.Contexts[device_id]);
   if (err != CUDA_SUCCESS) {
@@ -890,21 +905,34 @@ int32_t __tgt_rtl_run_target_team_region(int32_t device_id, void *tgt_entry_ptr,
   }
 
   DP("Launch of entry point at " DPxMOD " successful!\n",
-      DPxPTR(tgt_entry_ptr));
+     DPxPTR(tgt_entry_ptr));
 
   return OFFLOAD_SUCCESS;
 }
 
 int32_t __tgt_rtl_run_target_region(int32_t device_id, void *tgt_entry_ptr,
                                     void **tgt_args, ptr
diff _t *tgt_offsets,
-                                    int32_t arg_num,
-                                    __tgt_async_info *async_info) {
+                                    int32_t arg_num) {
+  __tgt_async_info async_info;
+  int32_t rc = __tgt_rtl_run_target_region_async(
+      device_id, tgt_entry_ptr, tgt_args, tgt_offsets, arg_num, &async_info);
+  if (rc != OFFLOAD_SUCCESS)
+    return OFFLOAD_FAIL;
+
+  return __tgt_rtl_synchronize(device_id, &async_info);
+}
+
+int32_t __tgt_rtl_run_target_region_async(int32_t device_id,
+                                          void *tgt_entry_ptr, void **tgt_args,
+                                          ptr
diff _t *tgt_offsets,
+                                          int32_t arg_num,
+                                          __tgt_async_info *async_info) {
   // use one team and the default number of threads.
   const int32_t team_num = 1;
   const int32_t thread_limit = 0;
-  return __tgt_rtl_run_target_team_region(device_id, tgt_entry_ptr, tgt_args,
-                                          tgt_offsets, arg_num, team_num,
-                                          thread_limit, 0, async_info);
+  return __tgt_rtl_run_target_team_region_async(
+      device_id, tgt_entry_ptr, tgt_args, tgt_offsets, arg_num, team_num,
+      thread_limit, 0, async_info);
 }
 
 int32_t __tgt_rtl_synchronize(int32_t device_id, __tgt_async_info *async_info) {

diff  --git a/openmp/libomptarget/plugins/exports b/openmp/libomptarget/plugins/exports
index cbbad6d0364d..a4e1a3186daa 100644
--- a/openmp/libomptarget/plugins/exports
+++ b/openmp/libomptarget/plugins/exports
@@ -7,10 +7,14 @@ VERS1.0 {
     __tgt_rtl_load_binary;
     __tgt_rtl_data_alloc;
     __tgt_rtl_data_submit;
+    __tgt_rtl_data_submit_async;
     __tgt_rtl_data_retrieve;
+    __tgt_rtl_data_retrieve_async;
     __tgt_rtl_data_delete;
     __tgt_rtl_run_target_team_region;
+    __tgt_rtl_run_target_team_region_async;
     __tgt_rtl_run_target_region;
+    __tgt_rtl_run_target_region_async;
     __tgt_rtl_synchronize;
   local:
     *;

diff  --git a/openmp/libomptarget/plugins/generic-elf-64bit/src/rtl.cpp b/openmp/libomptarget/plugins/generic-elf-64bit/src/rtl.cpp
index 84875f591ac0..8a6e085d3f75 100644
--- a/openmp/libomptarget/plugins/generic-elf-64bit/src/rtl.cpp
+++ b/openmp/libomptarget/plugins/generic-elf-64bit/src/rtl.cpp
@@ -277,13 +277,13 @@ void *__tgt_rtl_data_alloc(int32_t device_id, int64_t size, void *hst_ptr) {
 }
 
 int32_t __tgt_rtl_data_submit(int32_t device_id, void *tgt_ptr, void *hst_ptr,
-                              int64_t size, __tgt_async_info *) {
+                              int64_t size) {
   memcpy(tgt_ptr, hst_ptr, size);
   return OFFLOAD_SUCCESS;
 }
 
 int32_t __tgt_rtl_data_retrieve(int32_t device_id, void *hst_ptr, void *tgt_ptr,
-                                int64_t size, __tgt_async_info *) {
+                                int64_t size) {
   memcpy(hst_ptr, tgt_ptr, size);
   return OFFLOAD_SUCCESS;
 }
@@ -293,11 +293,12 @@ int32_t __tgt_rtl_data_delete(int32_t device_id, void *tgt_ptr) {
   return OFFLOAD_SUCCESS;
 }
 
-int32_t __tgt_rtl_run_target_team_region(
-    int32_t device_id, void *tgt_entry_ptr, void **tgt_args,
-    ptr
diff _t *tgt_offsets, int32_t arg_num, int32_t team_num,
-    int32_t thread_limit, uint64_t loop_tripcount /*not used*/,
-    __tgt_async_info *async_info /*not used*/) {
+int32_t __tgt_rtl_run_target_team_region(int32_t device_id, void *tgt_entry_ptr,
+                                         void **tgt_args,
+                                         ptr
diff _t *tgt_offsets,
+                                         int32_t arg_num, int32_t team_num,
+                                         int32_t thread_limit,
+                                         uint64_t loop_tripcount /*not used*/) {
   // ignore team num and thread limit.
 
   // Use libffi to launch execution.
@@ -331,17 +332,10 @@ int32_t __tgt_rtl_run_target_team_region(
 
 int32_t __tgt_rtl_run_target_region(int32_t device_id, void *tgt_entry_ptr,
                                     void **tgt_args, ptr
diff _t *tgt_offsets,
-                                    int32_t arg_num,
-                                    __tgt_async_info *async_info_ptr) {
+                                    int32_t arg_num) {
   // use one team and one thread.
   return __tgt_rtl_run_target_team_region(device_id, tgt_entry_ptr, tgt_args,
-                                          tgt_offsets, arg_num, 1, 1, 0,
-                                          async_info_ptr);
-}
-
-int32_t __tgt_rtl_synchronize(int32_t device_id,
-                              __tgt_async_info *async_info_ptr) {
-  return OFFLOAD_SUCCESS;
+                                          tgt_offsets, arg_num, 1, 1, 0);
 }
 
 #ifdef __cplusplus

diff  --git a/openmp/libomptarget/src/device.cpp b/openmp/libomptarget/src/device.cpp
index 09ddcceff9ea..765dd54fe5ca 100644
--- a/openmp/libomptarget/src/device.cpp
+++ b/openmp/libomptarget/src/device.cpp
@@ -334,24 +334,33 @@ __tgt_target_table *DeviceTy::load_binary(void *Img) {
 // Submit data to device
 int32_t DeviceTy::data_submit(void *TgtPtrBegin, void *HstPtrBegin,
                               int64_t Size, __tgt_async_info *AsyncInfoPtr) {
-
-  return RTL->data_submit(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size,
-                          AsyncInfoPtr);
+  if (!AsyncInfoPtr || !RTL->data_submit_async || !RTL->synchronize)
+    return RTL->data_submit(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size);
+  else
+    return RTL->data_submit_async(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size,
+                                  AsyncInfoPtr);
 }
 
 // Retrieve data from device
 int32_t DeviceTy::data_retrieve(void *HstPtrBegin, void *TgtPtrBegin,
                                 int64_t Size, __tgt_async_info *AsyncInfoPtr) {
-  return RTL->data_retrieve(RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size,
-                            AsyncInfoPtr);
+  if (!AsyncInfoPtr || !RTL->data_retrieve_async || !RTL->synchronize)
+    return RTL->data_retrieve(RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size);
+  else
+    return RTL->data_retrieve_async(RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size,
+                                    AsyncInfoPtr);
 }
 
 // Run region on device
 int32_t DeviceTy::run_region(void *TgtEntryPtr, void **TgtVarsPtr,
                              ptr
diff _t *TgtOffsets, int32_t TgtVarsSize,
-                             __tgt_async_info *AsyncInfo) {
-  return RTL->run_region(RTLDeviceID, TgtEntryPtr, TgtVarsPtr, TgtOffsets,
-                         TgtVarsSize, AsyncInfo);
+                             __tgt_async_info *AsyncInfoPtr) {
+  if (!AsyncInfoPtr || !RTL->run_region || !RTL->synchronize)
+    return RTL->run_region(RTLDeviceID, TgtEntryPtr, TgtVarsPtr, TgtOffsets,
+                           TgtVarsSize);
+  else
+    return RTL->run_region_async(RTLDeviceID, TgtEntryPtr, TgtVarsPtr,
+                                 TgtOffsets, TgtVarsSize, AsyncInfoPtr);
 }
 
 // Run team region on device.
@@ -359,10 +368,15 @@ int32_t DeviceTy::run_team_region(void *TgtEntryPtr, void **TgtVarsPtr,
                                   ptr
diff _t *TgtOffsets, int32_t TgtVarsSize,
                                   int32_t NumTeams, int32_t ThreadLimit,
                                   uint64_t LoopTripCount,
-                                  __tgt_async_info *AsyncInfo) {
-  return RTL->run_team_region(RTLDeviceID, TgtEntryPtr, TgtVarsPtr, TgtOffsets,
-                              TgtVarsSize, NumTeams, ThreadLimit, LoopTripCount,
-                              AsyncInfo);
+                                  __tgt_async_info *AsyncInfoPtr) {
+  if (!AsyncInfoPtr || !RTL->run_team_region_async || !RTL->synchronize)
+    return RTL->run_team_region(RTLDeviceID, TgtEntryPtr, TgtVarsPtr,
+                                TgtOffsets, TgtVarsSize, NumTeams, ThreadLimit,
+                                LoopTripCount);
+  else
+    return RTL->run_team_region_async(RTLDeviceID, TgtEntryPtr, TgtVarsPtr,
+                                      TgtOffsets, TgtVarsSize, NumTeams,
+                                      ThreadLimit, LoopTripCount, AsyncInfoPtr);
 }
 
 /// Check whether a device has an associated RTL and initialize it if it's not

diff  --git a/openmp/libomptarget/src/device.h b/openmp/libomptarget/src/device.h
index e44adaf70e4e..a3a5767f81ff 100644
--- a/openmp/libomptarget/src/device.h
+++ b/openmp/libomptarget/src/device.h
@@ -174,8 +174,8 @@ struct DeviceTy {
   int32_t initOnce();
   __tgt_target_table *load_binary(void *Img);
 
-  // Asynchronous data transfer. When AsyncInfoPtr is nullptr, the transfer will
-  // be synchronous.
+  // Data transfer. When AsyncInfoPtr is nullptr, the transfer will be
+  // synchronous.
   int32_t data_submit(void *TgtPtrBegin, void *HstPtrBegin, int64_t Size,
                       __tgt_async_info *AsyncInfoPtr);
   int32_t data_retrieve(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size,
@@ -183,11 +183,12 @@ struct DeviceTy {
 
   int32_t run_region(void *TgtEntryPtr, void **TgtVarsPtr,
                      ptr
diff _t *TgtOffsets, int32_t TgtVarsSize,
-                     __tgt_async_info *AsyncInfo);
+                     __tgt_async_info *AsyncInfoPtr);
   int32_t run_team_region(void *TgtEntryPtr, void **TgtVarsPtr,
                           ptr
diff _t *TgtOffsets, int32_t TgtVarsSize,
                           int32_t NumTeams, int32_t ThreadLimit,
-                          uint64_t LoopTripCount, __tgt_async_info *AsyncInfo);
+                          uint64_t LoopTripCount,
+                          __tgt_async_info *AsyncInfoPtr);
 
 private:
   // Call to RTL

diff  --git a/openmp/libomptarget/src/interface.cpp b/openmp/libomptarget/src/interface.cpp
index 8ff9d8cab1d9..924bc490b110 100644
--- a/openmp/libomptarget/src/interface.cpp
+++ b/openmp/libomptarget/src/interface.cpp
@@ -108,18 +108,18 @@ EXTERN void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
     return;
   }
 
-  DeviceTy& Device = Devices[device_id];
+  DeviceTy &Device = Devices[device_id];
 
 #ifdef OMPTARGET_DEBUG
-  for (int i=0; i<arg_num; ++i) {
+  for (int i = 0; i < arg_num; ++i) {
     DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
-        ", Type=0x%" PRIx64 "\n", i, DPxPTR(args_base[i]), DPxPTR(args[i]),
-        arg_sizes[i], arg_types[i]);
+       ", Type=0x%" PRIx64 "\n",
+       i, DPxPTR(args_base[i]), DPxPTR(args[i]), arg_sizes[i], arg_types[i]);
   }
 #endif
 
-  int rc = target_data_begin(Device, arg_num, args_base,
-      args, arg_sizes, arg_types);
+  int rc = target_data_begin(Device, arg_num, args_base, args, arg_sizes,
+                             arg_types, nullptr);
   HandleTargetOutcome(rc == OFFLOAD_SUCCESS);
 }
 
@@ -171,8 +171,8 @@ EXTERN void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
   }
 #endif
 
-  int rc = target_data_end(Device, arg_num, args_base,
-      args, arg_sizes, arg_types);
+  int rc = target_data_end(Device, arg_num, args_base, args, arg_sizes,
+                           arg_types, nullptr);
   HandleTargetOutcome(rc == OFFLOAD_SUCCESS);
 }
 

diff  --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp
index 4517a89726e3..3113bdc2a9d3 100644
--- a/openmp/libomptarget/src/omptarget.cpp
+++ b/openmp/libomptarget/src/omptarget.cpp
@@ -816,5 +816,8 @@ int target(int64_t device_id, void *host_ptr, int32_t arg_num,
     return OFFLOAD_FAIL;
   }
 
-  return Device.RTL->synchronize(device_id, &AsyncInfo);
+  if (Device.RTL->synchronize)
+    return Device.RTL->synchronize(device_id, &AsyncInfo);
+
+  return OFFLOAD_SUCCESS;
 }

diff  --git a/openmp/libomptarget/src/private.h b/openmp/libomptarget/src/private.h
index 6e6b39f3fdca..dbc5bafbab5b 100644
--- a/openmp/libomptarget/src/private.h
+++ b/openmp/libomptarget/src/private.h
@@ -20,11 +20,11 @@
 extern int target_data_begin(DeviceTy &Device, int32_t arg_num,
                              void **args_base, void **args, int64_t *arg_sizes,
                              int64_t *arg_types,
-                             __tgt_async_info *async_info_ptr = nullptr);
+                             __tgt_async_info *async_info_ptr);
 
 extern int target_data_end(DeviceTy &Device, int32_t arg_num, void **args_base,
                            void **args, int64_t *arg_sizes, int64_t *arg_types,
-                           __tgt_async_info *async_info_ptr = nullptr);
+                           __tgt_async_info *async_info_ptr);
 
 extern int target_data_update(DeviceTy &Device, int32_t arg_num,
     void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);

diff  --git a/openmp/libomptarget/src/rtl.cpp b/openmp/libomptarget/src/rtl.cpp
index ed0be2c2ee53..1439f67e7c64 100644
--- a/openmp/libomptarget/src/rtl.cpp
+++ b/openmp/libomptarget/src/rtl.cpp
@@ -96,43 +96,49 @@ void RTLsTy::LoadRTLs() {
     R.RTLName = Name;
 #endif
 
-    if (!(*((void**) &R.is_valid_binary) = dlsym(
-              dynlib_handle, "__tgt_rtl_is_valid_binary")))
+    if (!(*((void **)&R.is_valid_binary) =
+              dlsym(dynlib_handle, "__tgt_rtl_is_valid_binary")))
       continue;
-    if (!(*((void**) &R.number_of_devices) = dlsym(
-              dynlib_handle, "__tgt_rtl_number_of_devices")))
+    if (!(*((void **)&R.number_of_devices) =
+              dlsym(dynlib_handle, "__tgt_rtl_number_of_devices")))
       continue;
-    if (!(*((void**) &R.init_device) = dlsym(
-              dynlib_handle, "__tgt_rtl_init_device")))
+    if (!(*((void **)&R.init_device) =
+              dlsym(dynlib_handle, "__tgt_rtl_init_device")))
       continue;
-    if (!(*((void**) &R.load_binary) = dlsym(
-              dynlib_handle, "__tgt_rtl_load_binary")))
+    if (!(*((void **)&R.load_binary) =
+              dlsym(dynlib_handle, "__tgt_rtl_load_binary")))
       continue;
-    if (!(*((void**) &R.data_alloc) = dlsym(
-              dynlib_handle, "__tgt_rtl_data_alloc")))
+    if (!(*((void **)&R.data_alloc) =
+              dlsym(dynlib_handle, "__tgt_rtl_data_alloc")))
       continue;
-    if (!(*((void**) &R.data_submit) = dlsym(
-              dynlib_handle, "__tgt_rtl_data_submit")))
+    if (!(*((void **)&R.data_submit) =
+              dlsym(dynlib_handle, "__tgt_rtl_data_submit")))
       continue;
-    if (!(*((void**) &R.data_retrieve) = dlsym(
-              dynlib_handle, "__tgt_rtl_data_retrieve")))
+    if (!(*((void **)&R.data_retrieve) =
+              dlsym(dynlib_handle, "__tgt_rtl_data_retrieve")))
       continue;
-    if (!(*((void**) &R.data_delete) = dlsym(
-              dynlib_handle, "__tgt_rtl_data_delete")))
+    if (!(*((void **)&R.data_delete) =
+              dlsym(dynlib_handle, "__tgt_rtl_data_delete")))
       continue;
-    if (!(*((void**) &R.run_region) = dlsym(
-              dynlib_handle, "__tgt_rtl_run_target_region")))
+    if (!(*((void **)&R.run_region) =
+              dlsym(dynlib_handle, "__tgt_rtl_run_target_region")))
       continue;
-    if (!(*((void**) &R.run_team_region) = dlsym(
-              dynlib_handle, "__tgt_rtl_run_target_team_region")))
-      continue;
-    if (!(*((void**) &R.synchronize) = dlsym(
-              dynlib_handle, "__tgt_rtl_synchronize")))
+    if (!(*((void **)&R.run_team_region) =
+              dlsym(dynlib_handle, "__tgt_rtl_run_target_team_region")))
       continue;
 
     // Optional functions
-    *((void**) &R.init_requires) = dlsym(
-        dynlib_handle, "__tgt_rtl_init_requires");
+    *((void **)&R.init_requires) =
+        dlsym(dynlib_handle, "__tgt_rtl_init_requires");
+    *((void **)&R.data_submit_async) =
+        dlsym(dynlib_handle, "__tgt_rtl_data_submit_async");
+    *((void **)&R.data_retrieve_async) =
+        dlsym(dynlib_handle, "__tgt_rtl_data_retrieve_async");
+    *((void **)&R.run_region_async) =
+        dlsym(dynlib_handle, "__tgt_rtl_run_target_region_async");
+    *((void **)&R.run_team_region_async) =
+        dlsym(dynlib_handle, "__tgt_rtl_run_target_team_region_async");
+    *((void **)&R.synchronize) = dlsym(dynlib_handle, "__tgt_rtl_synchronize");
 
     // No devices are supported by this RTL?
     if (!(R.NumberOfDevices = R.number_of_devices())) {
@@ -140,8 +146,8 @@ void RTLsTy::LoadRTLs() {
       continue;
     }
 
-    DP("Registering RTL %s supporting %d devices!\n",
-        R.RTLName.c_str(), R.NumberOfDevices);
+    DP("Registering RTL %s supporting %d devices!\n", R.RTLName.c_str(),
+       R.NumberOfDevices);
 
     // The RTL is valid! Will save the information in the RTLs list.
     AllRTLs.push_back(R);

diff  --git a/openmp/libomptarget/src/rtl.h b/openmp/libomptarget/src/rtl.h
index 846c89b0ed2e..86ecd6724a8d 100644
--- a/openmp/libomptarget/src/rtl.h
+++ b/openmp/libomptarget/src/rtl.h
@@ -30,16 +30,23 @@ struct RTLInfoTy {
   typedef int32_t(init_device_ty)(int32_t);
   typedef __tgt_target_table *(load_binary_ty)(int32_t, void *);
   typedef void *(data_alloc_ty)(int32_t, int64_t, void *);
-  typedef int32_t(data_submit_ty)(int32_t, void *, void *, int64_t,
-                                  __tgt_async_info *);
-  typedef int32_t(data_retrieve_ty)(int32_t, void *, void *, int64_t,
-                                    __tgt_async_info *);
+  typedef int32_t(data_submit_ty)(int32_t, void *, void *, int64_t);
+  typedef int32_t(data_submit_async_ty)(int32_t, void *, void *, int64_t,
+                                        __tgt_async_info *);
+  typedef int32_t(data_retrieve_ty)(int32_t, void *, void *, int64_t);
+  typedef int32_t(data_retrieve_async_ty)(int32_t, void *, void *, int64_t,
+                                          __tgt_async_info *);
   typedef int32_t(data_delete_ty)(int32_t, void *);
-  typedef int32_t(run_region_ty)(int32_t, void *, void **, ptr
diff _t *, int32_t,
-                                 __tgt_async_info *);
+  typedef int32_t(run_region_ty)(int32_t, void *, void **, ptr
diff _t *,
+                                 int32_t);
+  typedef int32_t(run_region_async_ty)(int32_t, void *, void **, ptr
diff _t *,
+                                       int32_t, __tgt_async_info *);
   typedef int32_t(run_team_region_ty)(int32_t, void *, void **, ptr
diff _t *,
-                                      int32_t, int32_t, int32_t, uint64_t,
-                                      __tgt_async_info *);
+                                      int32_t, int32_t, int32_t, uint64_t);
+  typedef int32_t(run_team_region_async_ty)(int32_t, void *, void **,
+                                            ptr
diff _t *, int32_t, int32_t,
+                                            int32_t, uint64_t,
+                                            __tgt_async_info *);
   typedef int64_t(init_requires_ty)(int64_t);
   typedef int64_t(synchronize_ty)(int64_t, __tgt_async_info *);
 
@@ -62,10 +69,14 @@ struct RTLInfoTy {
   load_binary_ty *load_binary = nullptr;
   data_alloc_ty *data_alloc = nullptr;
   data_submit_ty *data_submit = nullptr;
+  data_submit_async_ty *data_submit_async = nullptr;
   data_retrieve_ty *data_retrieve = nullptr;
+  data_retrieve_async_ty *data_retrieve_async = nullptr;
   data_delete_ty *data_delete = nullptr;
   run_region_ty *run_region = nullptr;
+  run_region_async_ty *run_region_async = nullptr;
   run_team_region_ty *run_team_region = nullptr;
+  run_team_region_async_ty *run_team_region_async = nullptr;
   init_requires_ty *init_requires = nullptr;
   synchronize_ty *synchronize = nullptr;
 
@@ -94,10 +105,14 @@ struct RTLInfoTy {
     load_binary = r.load_binary;
     data_alloc = r.data_alloc;
     data_submit = r.data_submit;
+    data_submit_async = r.data_submit_async;
     data_retrieve = r.data_retrieve;
+    data_retrieve_async = r.data_retrieve_async;
     data_delete = r.data_delete;
     run_region = r.run_region;
+    run_region_async = r.run_region_async;
     run_team_region = r.run_team_region;
+    run_team_region_async = r.run_team_region_async;
     init_requires = r.init_requires;
     isUsed = r.isUsed;
     synchronize = r.synchronize;


        


More information about the Openmp-commits mailing list