[Openmp-commits] [openmp] [OpenMP][AMDGPU] Add interop support for OpenMP AMD GPU plugin (PR #88000)

via Openmp-commits openmp-commits at lists.llvm.org
Mon Apr 8 07:34:01 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-openmp

Author: Michael Halkenhäuser (mhalk)

<details>
<summary>Changes</summary>

Add interop related functionalities for OpenMP AMD GPU plugin, including get async queue, get device reference and get backend runtime's ref ID.

Originally authored here: https://reviews.llvm.org/D137607

---

Patch is 20.60 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/88000.diff


13 Files Affected:

- (modified) openmp/libomptarget/include/OpenMP/InteropAPI.h (+102-5) 
- (modified) openmp/libomptarget/include/OpenMP/omp.h (-106) 
- (modified) openmp/libomptarget/include/Shared/PluginAPI.h (+4) 
- (modified) openmp/libomptarget/include/Shared/PluginAPI.inc (+1) 
- (modified) openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp (+11) 
- (modified) openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h (+5-1) 
- (modified) openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp (+15) 
- (modified) openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp (+11) 
- (modified) openmp/libomptarget/src/OpenMP/InteropAPI.cpp (+26-5) 
- (added) openmp/libomptarget/test/api/omp_interop_amdgpu.c (+92) 
- (modified) openmp/runtime/src/include/omp.h.var (+2-1) 
- (modified) openmp/runtime/src/include/omp_lib.h.var (+3-1) 
- (modified) openmp/runtime/src/kmp_ftn_entry.h (+2-1) 


``````````diff
diff --git a/openmp/libomptarget/include/OpenMP/InteropAPI.h b/openmp/libomptarget/include/OpenMP/InteropAPI.h
index 71c78760a32265..f686ea9bd85fa0 100644
--- a/openmp/libomptarget/include/OpenMP/InteropAPI.h
+++ b/openmp/libomptarget/include/OpenMP/InteropAPI.h
@@ -11,12 +11,72 @@
 #ifndef OMPTARGET_OPENMP_INTEROP_API_H
 #define OMPTARGET_OPENMP_INTEROP_API_H
 
-#include "omp.h"
+#define omp_interop_none 0
 
+#include "omp.h"
 #include "omptarget.h"
 
 extern "C" {
 
+/// TODO: Include the `omp.h` of the current build
+/* OpenMP 5.1 interop */
+typedef intptr_t omp_intptr_t;
+
+/* 0..omp_get_num_interop_properties()-1 are reserved for implementation-defined
+ * properties */
+typedef enum omp_interop_property {
+  omp_ipr_fr_id = -1,
+  omp_ipr_fr_name = -2,
+  omp_ipr_vendor = -3,
+  omp_ipr_vendor_name = -4,
+  omp_ipr_device_num = -5,
+  omp_ipr_platform = -6,
+  omp_ipr_device = -7,
+  omp_ipr_device_context = -8,
+  omp_ipr_targetsync = -9,
+  omp_ipr_first = -9
+} omp_interop_property_t;
+
+typedef enum omp_interop_rc {
+  omp_irc_no_value = 1,
+  omp_irc_success = 0,
+  omp_irc_empty = -1,
+  omp_irc_out_of_range = -2,
+  omp_irc_type_int = -3,
+  omp_irc_type_ptr = -4,
+  omp_irc_type_str = -5,
+  omp_irc_other = -6
+} omp_interop_rc_t;
+
+typedef enum omp_interop_fr {
+  omp_ifr_cuda = 1,
+  omp_ifr_cuda_driver = 2,
+  omp_ifr_opencl = 3,
+  omp_ifr_sycl = 4,
+  omp_ifr_hip = 5,
+  omp_ifr_level_zero = 6,
+  omp_ifr_amdhsa = 7,
+  omp_ifr_last = 8
+} omp_interop_fr_t;
+
+typedef enum omp_interop_backend_type_t {
+  // reserve 0
+  omp_interop_backend_type_cuda = 1,
+  omp_interop_backend_type_amdhsa = 7,
+  omp_interop_backend_type_invalid = 8
+} omp_interop_backend_type_t;
+
+typedef enum omp_foreign_runtime_ids {
+  invalid = 0,
+  cuda = 1,
+  cuda_driver = 2,
+  opencl = 3,
+  sycl = 4,
+  hip = 5,
+  level_zero = 6,
+  amdhsa = 7
+} omp_foreign_runtime_ids_t;
+
 typedef enum kmp_interop_type_t {
   kmp_interop_type_unknown = -1,
   kmp_interop_type_platform,
@@ -24,20 +84,57 @@ typedef enum kmp_interop_type_t {
   kmp_interop_type_tasksync,
 } kmp_interop_type_t;
 
+typedef void *omp_interop_t;
+
 /// The interop value type, aka. the interop object.
 typedef struct omp_interop_val_t {
   /// Device and interop-type are determined at construction time and fix.
-  omp_interop_val_t(intptr_t device_id, kmp_interop_type_t interop_type)
-      : interop_type(interop_type), device_id(device_id) {}
+  omp_interop_val_t(intptr_t device_id, kmp_interop_type_t interop_type,
+                    omp_foreign_runtime_ids_t vendor_id,
+                    intptr_t backend_type_id)
+      : interop_type(interop_type), device_id(device_id), vendor_id(vendor_id),
+        backend_type_id(backend_type_id) {}
   const char *err_str = nullptr;
   __tgt_async_info *async_info = nullptr;
   __tgt_device_info device_info;
   const kmp_interop_type_t interop_type;
   const intptr_t device_id;
-  const omp_foreign_runtime_ids_t vendor_id = cuda;
-  const intptr_t backend_type_id = omp_interop_backend_type_cuda_1;
+  omp_foreign_runtime_ids_t vendor_id;
+  intptr_t backend_type_id;
 } omp_interop_val_t;
 
+/// Retrieves the number of implementation-defined properties available for an
+/// omp_interop_t object.
+int __KAI_KMPC_CONVENTION omp_get_num_interop_properties(const omp_interop_t);
+
+/// Retrieves an integer property from an omp_interop_t object.
+omp_intptr_t __KAI_KMPC_CONVENTION omp_get_interop_int(const omp_interop_t,
+                                                       omp_interop_property_t,
+                                                       int *);
+
+/// Retrieves a pointer property from an omp_interop_t object.
+void *__KAI_KMPC_CONVENTION omp_get_interop_ptr(const omp_interop_t,
+                                                omp_interop_property_t, int *);
+
+/// Retrieve a string property from an omp_interop_t object.
+const char *__KAI_KMPC_CONVENTION omp_get_interop_str(const omp_interop_t,
+                                                      omp_interop_property_t,
+                                                      int *);
+
+/// Retrieve a property name from an omp_interop_t object.
+const char *__KAI_KMPC_CONVENTION omp_get_interop_name(const omp_interop_t,
+                                                       omp_interop_property_t);
+
+/// Retrieve a description of the type of a property associated with an
+/// omp_interop_t object.
+const char *__KAI_KMPC_CONVENTION
+omp_get_interop_type_desc(const omp_interop_t, omp_interop_property_t);
+
+/// Retrieve a description of the return code associated with an omp_interop_t
+/// object.
+extern const char *__KAI_KMPC_CONVENTION
+omp_get_interop_rc_desc(const omp_interop_t, omp_interop_rc_t);
+
 } // extern "C"
 
 #endif // OMPTARGET_OPENMP_INTEROP_API_H
diff --git a/openmp/libomptarget/include/OpenMP/omp.h b/openmp/libomptarget/include/OpenMP/omp.h
index b44c6aff1b289c..d360b5ef3b1641 100644
--- a/openmp/libomptarget/include/OpenMP/omp.h
+++ b/openmp/libomptarget/include/OpenMP/omp.h
@@ -44,112 +44,6 @@ int omp_get_default_device(void) __attribute__((weak));
 
 ///}
 
-/// InteropAPI
-///
-///{
-
-/// TODO: Include the `omp.h` of the current build
-/* OpenMP 5.1 interop */
-typedef intptr_t omp_intptr_t;
-
-/* 0..omp_get_num_interop_properties()-1 are reserved for implementation-defined
- * properties */
-typedef enum omp_interop_property {
-  omp_ipr_fr_id = -1,
-  omp_ipr_fr_name = -2,
-  omp_ipr_vendor = -3,
-  omp_ipr_vendor_name = -4,
-  omp_ipr_device_num = -5,
-  omp_ipr_platform = -6,
-  omp_ipr_device = -7,
-  omp_ipr_device_context = -8,
-  omp_ipr_targetsync = -9,
-  omp_ipr_first = -9
-} omp_interop_property_t;
-
-#define omp_interop_none 0
-
-typedef enum omp_interop_rc {
-  omp_irc_no_value = 1,
-  omp_irc_success = 0,
-  omp_irc_empty = -1,
-  omp_irc_out_of_range = -2,
-  omp_irc_type_int = -3,
-  omp_irc_type_ptr = -4,
-  omp_irc_type_str = -5,
-  omp_irc_other = -6
-} omp_interop_rc_t;
-
-typedef enum omp_interop_fr {
-  omp_ifr_cuda = 1,
-  omp_ifr_cuda_driver = 2,
-  omp_ifr_opencl = 3,
-  omp_ifr_sycl = 4,
-  omp_ifr_hip = 5,
-  omp_ifr_level_zero = 6,
-  omp_ifr_last = 7
-} omp_interop_fr_t;
-
-typedef void *omp_interop_t;
-
-/*!
- * The `omp_get_num_interop_properties` routine retrieves the number of
- * implementation-defined properties available for an `omp_interop_t` object.
- */
-int __KAI_KMPC_CONVENTION omp_get_num_interop_properties(const omp_interop_t);
-/*!
- * The `omp_get_interop_int` routine retrieves an integer property from an
- * `omp_interop_t` object.
- */
-omp_intptr_t __KAI_KMPC_CONVENTION
-omp_get_interop_int(const omp_interop_t, omp_interop_property_t, int *);
-/*!
- * The `omp_get_interop_ptr` routine retrieves a pointer property from an
- * `omp_interop_t` object.
- */
-void *__KAI_KMPC_CONVENTION omp_get_interop_ptr(const omp_interop_t,
-                                                omp_interop_property_t, int *);
-/*!
- * The `omp_get_interop_str` routine retrieves a string property from an
- * `omp_interop_t` object.
- */
-const char *__KAI_KMPC_CONVENTION
-omp_get_interop_str(const omp_interop_t, omp_interop_property_t, int *);
-/*!
- * The `omp_get_interop_name` routine retrieves a property name from an
- * `omp_interop_t` object.
- */
-const char *__KAI_KMPC_CONVENTION omp_get_interop_name(const omp_interop_t,
-                                                       omp_interop_property_t);
-/*!
- * The `omp_get_interop_type_desc` routine retrieves a description of the type
- * of a property associated with an `omp_interop_t` object.
- */
-const char *__KAI_KMPC_CONVENTION
-omp_get_interop_type_desc(const omp_interop_t, omp_interop_property_t);
-/*!
- * The `omp_get_interop_rc_desc` routine retrieves a description of the return
- * code associated with an `omp_interop_t` object.
- */
-extern const char *__KAI_KMPC_CONVENTION
-omp_get_interop_rc_desc(const omp_interop_t, omp_interop_rc_t);
-
-typedef enum omp_interop_backend_type_t {
-  // reserve 0
-  omp_interop_backend_type_cuda_1 = 1,
-} omp_interop_backend_type_t;
-
-typedef enum omp_foreign_runtime_ids {
-  cuda = 1,
-  cuda_driver = 2,
-  opencl = 3,
-  sycl = 4,
-  hip = 5,
-  level_zero = 6,
-} omp_foreign_runtime_ids_t;
-
-///} InteropAPI
-
 } // extern "C"
 
 #endif // OMPTARGET_OPENMP_OMP_H
diff --git a/openmp/libomptarget/include/Shared/PluginAPI.h b/openmp/libomptarget/include/Shared/PluginAPI.h
index ecf669c774f142..c80b9d1693c10e 100644
--- a/openmp/libomptarget/include/Shared/PluginAPI.h
+++ b/openmp/libomptarget/include/Shared/PluginAPI.h
@@ -17,6 +17,7 @@
 #include <cstddef>
 #include <cstdint>
 
+#include "OpenMP/InteropAPI.h"
 #include "Shared/APITypes.h"
 
 extern "C" {
@@ -165,6 +166,9 @@ void __tgt_rtl_set_info_flag(uint32_t);
 // Print the device information
 void __tgt_rtl_print_device_info(int32_t ID);
 
+// Set the runtime related information for interop object
+int32_t __tgt_rtl_set_interop_info(omp_interop_val_t *InteropPtr);
+
 // Event related interfaces. It is expected to use the interfaces in the
 // following way:
 // 1) Create an event on the target device (__tgt_rtl_create_event).
diff --git a/openmp/libomptarget/include/Shared/PluginAPI.inc b/openmp/libomptarget/include/Shared/PluginAPI.inc
index e445da6852f7b4..c11341d969a6db 100644
--- a/openmp/libomptarget/include/Shared/PluginAPI.inc
+++ b/openmp/libomptarget/include/Shared/PluginAPI.inc
@@ -35,6 +35,7 @@ PLUGIN_API_HANDLE(synchronize);
 PLUGIN_API_HANDLE(query_async);
 PLUGIN_API_HANDLE(set_info_flag);
 PLUGIN_API_HANDLE(print_device_info);
+PLUGIN_API_HANDLE(set_interop_info);
 PLUGIN_API_HANDLE(create_event);
 PLUGIN_API_HANDLE(record_event);
 PLUGIN_API_HANDLE(wait_event);
diff --git a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
index a0fdde951b74a7..654cce3bb39c04 100644
--- a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -2772,6 +2772,17 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
 
   bool useMultipleSdmaEngines() const { return OMPX_UseMultipleSdmaEngines; }
 
+  virtual Error setInteropInfo(omp_interop_val_t *InterOpPtr) override {
+    InterOpPtr->vendor_id = amdhsa;
+    InterOpPtr->backend_type_id = omp_interop_backend_type_amdhsa;
+
+    __tgt_device_info *DevInfo = &InterOpPtr->device_info;
+    DevInfo->Context = nullptr;
+    DevInfo->Device = &Agent;
+
+    return Plugin::success();
+  }
+
 private:
   using AMDGPUEventRef = AMDGPUResourceRef<AMDGPUEventTy>;
   using AMDGPUEventManagerTy = GenericDeviceResourceManagerTy<AMDGPUEventRef>;
diff --git a/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h b/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h
index 79e8464bfda5c1..84159920a57309 100644
--- a/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h
+++ b/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h
@@ -19,6 +19,7 @@
 #include <shared_mutex>
 #include <vector>
 
+#include "OpenMP/InteropAPI.h"
 #include "Shared/Debug.h"
 #include "Shared/Environment.h"
 #include "Shared/EnvironmentVar.h"
@@ -850,6 +851,10 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
     return 0;
   }
 
+  virtual Error setInteropInfo(omp_interop_val_t *InterOpPtr) {
+    return Error::success();
+  }
+
   virtual Error getDeviceStackSize(uint64_t &V) = 0;
 
   /// Returns true if current plugin architecture is an APU
@@ -1059,7 +1064,6 @@ struct GenericPluginTy {
   /// we could not move this function into GenericDeviceTy.
   virtual Expected<bool> isELFCompatible(StringRef Image) const = 0;
 
-protected:
   /// Indicate whether a device id is valid.
   bool isValidDeviceId(int32_t DeviceId) const {
     return (DeviceId >= 0 && DeviceId < getNumDevices());
diff --git a/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp b/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
index b5f3c45c835fdb..febcd8ecb756c6 100644
--- a/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
@@ -2040,6 +2040,21 @@ int32_t __tgt_rtl_init_plugin() {
   return OFFLOAD_SUCCESS;
 }
 
+int32_t __tgt_rtl_set_interop_info(omp_interop_val_t *InterOpPtr) {
+  assert(InterOpPtr && "Interop object is allocated");
+  int32_t DevId = InterOpPtr->device_id;
+
+  assert(PluginTy::get().isValidDeviceId(DevId) && "Device Id is valid");
+  if (auto Err = PluginTy::get().getDevice(DevId).setInteropInfo(InterOpPtr)) {
+    REPORT("Failure to determine the OpenMP interop object info for Device Id "
+           "%i\n",
+           DevId);
+    return OFFLOAD_FAIL;
+  }
+
+  return OFFLOAD_SUCCESS;
+}
+
 int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *Image) {
   if (!PluginTy::isActive())
     return false;
diff --git a/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
index fc74c6aa23fddd..ad074a88035b5c 100644
--- a/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
+++ b/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
@@ -1143,6 +1143,17 @@ struct CUDADeviceTy : public GenericDeviceTy {
   /// Returns the clock frequency for the given NVPTX device.
   uint64_t getClockFrequency() const override { return 1000000000; }
 
+  virtual Error setInteropInfo(omp_interop_val_t *InterOpPtr) override {
+    InterOpPtr->vendor_id = cuda;
+    InterOpPtr->backend_type_id = omp_interop_backend_type_cuda;
+
+    __tgt_device_info *DevInfo = &InterOpPtr->device_info;
+    DevInfo->Context = Context;
+    DevInfo->Device = Device;
+
+    return Plugin::success();
+  }
+
 private:
   using CUDAStreamManagerTy = GenericDeviceResourceManagerTy<CUDAStreamRef>;
   using CUDAEventManagerTy = GenericDeviceResourceManagerTy<CUDAEventRef>;
diff --git a/openmp/libomptarget/src/OpenMP/InteropAPI.cpp b/openmp/libomptarget/src/OpenMP/InteropAPI.cpp
index 1a995cde7816e1..1db2addb25119c 100644
--- a/openmp/libomptarget/src/OpenMP/InteropAPI.cpp
+++ b/openmp/libomptarget/src/OpenMP/InteropAPI.cpp
@@ -70,8 +70,21 @@ const char *getVendorIdToStr(const omp_foreign_runtime_ids_t VendorId) {
     return ("hip");
   case level_zero:
     return ("level_zero");
+  case amdhsa:
+    return ("amdhsa");
+  default:
+    return ("unknown");
+  }
+}
+
+const char *getBackendIdToStr(intptr_t BackendId) {
+  switch (BackendId) {
+  case omp_interop_backend_type_cuda:
+    return "cuda backend";
+  case omp_interop_backend_type_amdhsa:
+    return "amdhsa backend";
   }
-  return ("unknown");
+  return "unknown backend";
 }
 
 template <typename PropertyTy>
@@ -105,6 +118,8 @@ const char *getProperty<const char *>(omp_interop_val_t &InteropVal,
                : "device+context";
   case omp_ipr_vendor_name:
     return getVendorIdToStr(InteropVal.vendor_id);
+  case omp_ipr_fr_name:
+    return getBackendIdToStr(InteropVal.backend_type_id);
   default:
     getTypeMismatch(Property, Err);
     return nullptr;
@@ -221,8 +236,11 @@ void __tgt_interop_init(ident_t *LocRef, int32_t Gtid,
                          NoaliasDepList);
   }
 
-  InteropPtr = new omp_interop_val_t(DeviceId, InteropType);
+  // Create interop value object
+  InteropPtr = new omp_interop_val_t(DeviceId, InteropType, invalid,
+                                     omp_interop_backend_type_invalid);
 
+  // Get an intitialized and ready device, or error
   auto DeviceOrErr = PM->getDevice(DeviceId);
   if (!DeviceOrErr) {
     InteropPtr->err_str = copyErrorString(DeviceOrErr.takeError());
@@ -230,12 +248,15 @@ void __tgt_interop_init(ident_t *LocRef, int32_t Gtid,
   }
 
   DeviceTy &Device = *DeviceOrErr;
-  if (!Device.RTL || !Device.RTL->init_device_info ||
-      Device.RTL->init_device_info(DeviceId, &(InteropPtr)->device_info,
-                                   &(InteropPtr)->err_str)) {
+  if (!Device.RTL || !Device.RTL->set_interop_info) {
     delete InteropPtr;
     InteropPtr = omp_interop_none;
+    return;
   }
+
+  // Retrieve the target specific interop value object
+  Device.RTL->set_interop_info(InteropPtr);
+
   if (InteropType == kmp_interop_type_tasksync) {
     if (!Device.RTL || !Device.RTL->init_async_info ||
         Device.RTL->init_async_info(DeviceId, &(InteropPtr)->async_info)) {
diff --git a/openmp/libomptarget/test/api/omp_interop_amdgpu.c b/openmp/libomptarget/test/api/omp_interop_amdgpu.c
new file mode 100644
index 00000000000000..c66df93d44dc94
--- /dev/null
+++ b/openmp/libomptarget/test/api/omp_interop_amdgpu.c
@@ -0,0 +1,92 @@
+// RUN: %libomptarget-compile-amdgcn-amd-amdhsa -O1
+// RUN: %libomptarget-run-amdgcn-amd-amdhsa | %fcheck-amdgcn-amd-amdhsa
+// REQUIRES: amdgcn-amd-amdhsa
+
+#include <omp.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#define N 16384
+
+void vectorSet(int n, double s, double *x) {
+  for (int i = 0; i < n; ++i)
+    x[i] = s * (i + 1);
+}
+
+void vectorCopy(int n, double *x, double *y) {
+  for (int i = 0; i < n; ++i)
+    y[i] = x[i];
+}
+
+void vectorScale(int n, double s, double *x) {
+  for (int i = 0; i < n; ++i)
+    x[i] = s * x[i];
+}
+
+int main() {
+  const double ScaleFactor = 2.0;
+  double x[N], y[N];
+  omp_interop_t SyncObj = omp_interop_none;
+  int DeviceNum = omp_get_default_device();
+
+  // clang-format off
+  #pragma omp target nowait depend(out : x [0:N])                                \
+          map(from : x [0:N]) device(DeviceNum)
+  // clang-format on
+  vectorSet(N, 1.0, x);
+
+#pragma omp task depend(out : y [0:N])
+  vectorSet(N, -1.0, y);
+
+  // Get SyncObject for synchronization
+  // clang-format off
+  #pragma omp interop init(targetsync : SyncObj) device(DeviceNum)               \
+          depend(in : x [0:N]) depend(inout : y [0:N])
+  // clang-format on
+
+  int ForeignContextId = (int)omp_get_interop_int(SyncObj, omp_ipr_fr_id, NULL);
+  char *ForeignContextName =
+      (char *)omp_get_interop_str(SyncObj, omp_ipr_fr_name, NULL);
+
+  if (SyncObj != omp_interop_none && ForeignContextId == omp_ifr_amdhsa) {
+    printf("OpenMP working with %s runtime to execute async memcpy.\n",
+           ForeignContextName);
+    int Status;
+    omp_get_interop_ptr(SyncObj, omp_ipr_targetsync, &Status);
+
+    if (Status != omp_irc_success) {
+      fprintf(stderr, "ERROR: Failed to get %s stream, rt error = %d.\n",
+              ForeignContextName, Status);
+      if (Status == omp_irc_no_value)
+        fprintf(stderr, "Parameters valid, but no meaningful value available.");
+      exit(1);
+    }
+
+    vectorCopy(N, x, y);
+  } else {
+    // Execute as OpenMP offload
+    printf("Notice: Offloading myCopy to perform memcpy.\n");
+    // clang-format off
+  #pragma omp target depend(in : x [0:N]) depend(inout : y [0:N]) nowait         \
+          map(to : x [0:N]) map(tofrom : y [0:N]) device(DeviceNum)
+    // clang-format on
+    vectorCopy(N, x, y);
+  }
+
+  // This also ensures foreign tasks complete
+#pragma omp interop destroy(SyncObj) nowait depend(out : y [0:N])
+
+#pragma omp target depend(inout : x [0:N])
+  vectorScale(N, ScaleFactor, x);
+
+#pragma omp taskwait
+
+  printf("(1 : 16384) %f:%f\n", y[0], y[N - 1]);
+  printf("(2 : 32768) %f:%f\n", x[0], x[N - 1]);
+
+  return 0;
+}
+
+// ToDo: Add meaningful checks; the following is a placeholder.
+
+// CHECK: OpenMP working with amdhsa backend runtime to execute async memcpy
diff --git a/openmp/runtime/src/include/omp.h.var b/openmp/runtime/src/include/omp.h.var
index eb3ab7778606a3..7c60764e447163 100644
--- a/openmp/runtime/src/include/omp.h.var
+++ b/openmp/runtime/src/include/omp.h.var
@@ -192,7 +192,8 @@
         omp_ifr_sycl = 4,
         omp_ifr_hip = 5,
         omp_ifr_level_zero = 6,
-        omp_ifr_last = 7
+        omp_ifr_amdhsa = 7,
+        omp_ifr_last = 8
     } omp_interop_fr_t;
 
     typedef void * omp_interop_t;
diff --git a/openmp/runtime/src/include/omp_lib.h.var b/openmp/runtime/src/include/omp_lib.h.var
index a709a2f298f8c8..a076890b1207b2 100644
--- a/openmp/runtime/src/include/omp_lib.h.var
+++ b/openmp/runtime/src/include/omp_lib.h.var
@@ -261,8 +261,10 @@
       parameter(omp_ifr_hip=5)
       integer(kind=omp_interop_fr_kind)omp_ifr_level_zero
       parameter(omp_ifr_level_zero=6)
+      integer(kind=omp_interop_fr_kind)omp_ifr_amdhsa
+      parameter(omp_ifr_amdhsa=7)
       integer(kind=omp_interop_fr_kind)omp_ifr...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/88000


More information about the Openmp-commits mailing list