[Openmp-commits] [openmp] [OpenMP][AMDGPU] Add interop support for OpenMP AMD GPU plugin (PR #88000)
via Openmp-commits
openmp-commits at lists.llvm.org
Mon Apr 8 07:34:01 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-openmp
Author: Michael Halkenhäuser (mhalk)
<details>
<summary>Changes</summary>
Add interop related functionalities for OpenMP AMD GPU plugin, including get async queue, get device reference and get backend runtime's ref ID.
Originally authored here: https://reviews.llvm.org/D137607
---
Patch is 20.60 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/88000.diff
13 Files Affected:
- (modified) openmp/libomptarget/include/OpenMP/InteropAPI.h (+102-5)
- (modified) openmp/libomptarget/include/OpenMP/omp.h (-106)
- (modified) openmp/libomptarget/include/Shared/PluginAPI.h (+4)
- (modified) openmp/libomptarget/include/Shared/PluginAPI.inc (+1)
- (modified) openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp (+11)
- (modified) openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h (+5-1)
- (modified) openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp (+15)
- (modified) openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp (+11)
- (modified) openmp/libomptarget/src/OpenMP/InteropAPI.cpp (+26-5)
- (added) openmp/libomptarget/test/api/omp_interop_amdgpu.c (+92)
- (modified) openmp/runtime/src/include/omp.h.var (+2-1)
- (modified) openmp/runtime/src/include/omp_lib.h.var (+3-1)
- (modified) openmp/runtime/src/kmp_ftn_entry.h (+2-1)
``````````diff
diff --git a/openmp/libomptarget/include/OpenMP/InteropAPI.h b/openmp/libomptarget/include/OpenMP/InteropAPI.h
index 71c78760a32265..f686ea9bd85fa0 100644
--- a/openmp/libomptarget/include/OpenMP/InteropAPI.h
+++ b/openmp/libomptarget/include/OpenMP/InteropAPI.h
@@ -11,12 +11,72 @@
#ifndef OMPTARGET_OPENMP_INTEROP_API_H
#define OMPTARGET_OPENMP_INTEROP_API_H
-#include "omp.h"
+#define omp_interop_none 0
+#include "omp.h"
#include "omptarget.h"
extern "C" {
+/// TODO: Include the `omp.h` of the current build
+/* OpenMP 5.1 interop */
+typedef intptr_t omp_intptr_t;
+
+/* 0..omp_get_num_interop_properties()-1 are reserved for implementation-defined
+ * properties */
+typedef enum omp_interop_property {
+ omp_ipr_fr_id = -1,
+ omp_ipr_fr_name = -2,
+ omp_ipr_vendor = -3,
+ omp_ipr_vendor_name = -4,
+ omp_ipr_device_num = -5,
+ omp_ipr_platform = -6,
+ omp_ipr_device = -7,
+ omp_ipr_device_context = -8,
+ omp_ipr_targetsync = -9,
+ omp_ipr_first = -9
+} omp_interop_property_t;
+
+typedef enum omp_interop_rc {
+ omp_irc_no_value = 1,
+ omp_irc_success = 0,
+ omp_irc_empty = -1,
+ omp_irc_out_of_range = -2,
+ omp_irc_type_int = -3,
+ omp_irc_type_ptr = -4,
+ omp_irc_type_str = -5,
+ omp_irc_other = -6
+} omp_interop_rc_t;
+
+typedef enum omp_interop_fr {
+ omp_ifr_cuda = 1,
+ omp_ifr_cuda_driver = 2,
+ omp_ifr_opencl = 3,
+ omp_ifr_sycl = 4,
+ omp_ifr_hip = 5,
+ omp_ifr_level_zero = 6,
+ omp_ifr_amdhsa = 7,
+ omp_ifr_last = 8
+} omp_interop_fr_t;
+
+typedef enum omp_interop_backend_type_t {
+ // reserve 0
+ omp_interop_backend_type_cuda = 1,
+ omp_interop_backend_type_amdhsa = 7,
+ omp_interop_backend_type_invalid = 8
+} omp_interop_backend_type_t;
+
+typedef enum omp_foreign_runtime_ids {
+ invalid = 0,
+ cuda = 1,
+ cuda_driver = 2,
+ opencl = 3,
+ sycl = 4,
+ hip = 5,
+ level_zero = 6,
+ amdhsa = 7
+} omp_foreign_runtime_ids_t;
+
typedef enum kmp_interop_type_t {
kmp_interop_type_unknown = -1,
kmp_interop_type_platform,
@@ -24,20 +84,57 @@ typedef enum kmp_interop_type_t {
kmp_interop_type_tasksync,
} kmp_interop_type_t;
+typedef void *omp_interop_t;
+
/// The interop value type, aka. the interop object.
typedef struct omp_interop_val_t {
/// Device and interop-type are determined at construction time and fix.
- omp_interop_val_t(intptr_t device_id, kmp_interop_type_t interop_type)
- : interop_type(interop_type), device_id(device_id) {}
+ omp_interop_val_t(intptr_t device_id, kmp_interop_type_t interop_type,
+ omp_foreign_runtime_ids_t vendor_id,
+ intptr_t backend_type_id)
+ : interop_type(interop_type), device_id(device_id), vendor_id(vendor_id),
+ backend_type_id(backend_type_id) {}
const char *err_str = nullptr;
__tgt_async_info *async_info = nullptr;
__tgt_device_info device_info;
const kmp_interop_type_t interop_type;
const intptr_t device_id;
- const omp_foreign_runtime_ids_t vendor_id = cuda;
- const intptr_t backend_type_id = omp_interop_backend_type_cuda_1;
+ omp_foreign_runtime_ids_t vendor_id;
+ intptr_t backend_type_id;
} omp_interop_val_t;
+/// Retrieves the number of implementation-defined properties available for an
+/// omp_interop_t object.
+int __KAI_KMPC_CONVENTION omp_get_num_interop_properties(const omp_interop_t);
+
+/// Retrieves an integer property from an omp_interop_t object.
+omp_intptr_t __KAI_KMPC_CONVENTION omp_get_interop_int(const omp_interop_t,
+ omp_interop_property_t,
+ int *);
+
+/// Retrieves a pointer property from an omp_interop_t object.
+void *__KAI_KMPC_CONVENTION omp_get_interop_ptr(const omp_interop_t,
+ omp_interop_property_t, int *);
+
+/// Retrieve a string property from an omp_interop_t object.
+const char *__KAI_KMPC_CONVENTION omp_get_interop_str(const omp_interop_t,
+ omp_interop_property_t,
+ int *);
+
+/// Retrieve a property name from an omp_interop_t object.
+const char *__KAI_KMPC_CONVENTION omp_get_interop_name(const omp_interop_t,
+ omp_interop_property_t);
+
+/// Retrieve a description of the type of a property associated with an
+/// omp_interop_t object.
+const char *__KAI_KMPC_CONVENTION
+omp_get_interop_type_desc(const omp_interop_t, omp_interop_property_t);
+
+/// Retrieve a description of the return code associated with an omp_interop_t
+/// object.
+extern const char *__KAI_KMPC_CONVENTION
+omp_get_interop_rc_desc(const omp_interop_t, omp_interop_rc_t);
+
} // extern "C"
#endif // OMPTARGET_OPENMP_INTEROP_API_H
diff --git a/openmp/libomptarget/include/OpenMP/omp.h b/openmp/libomptarget/include/OpenMP/omp.h
index b44c6aff1b289c..d360b5ef3b1641 100644
--- a/openmp/libomptarget/include/OpenMP/omp.h
+++ b/openmp/libomptarget/include/OpenMP/omp.h
@@ -44,112 +44,6 @@ int omp_get_default_device(void) __attribute__((weak));
///}
-/// InteropAPI
-///
-///{
-
-/// TODO: Include the `omp.h` of the current build
-/* OpenMP 5.1 interop */
-typedef intptr_t omp_intptr_t;
-
-/* 0..omp_get_num_interop_properties()-1 are reserved for implementation-defined
- * properties */
-typedef enum omp_interop_property {
- omp_ipr_fr_id = -1,
- omp_ipr_fr_name = -2,
- omp_ipr_vendor = -3,
- omp_ipr_vendor_name = -4,
- omp_ipr_device_num = -5,
- omp_ipr_platform = -6,
- omp_ipr_device = -7,
- omp_ipr_device_context = -8,
- omp_ipr_targetsync = -9,
- omp_ipr_first = -9
-} omp_interop_property_t;
-
-#define omp_interop_none 0
-
-typedef enum omp_interop_rc {
- omp_irc_no_value = 1,
- omp_irc_success = 0,
- omp_irc_empty = -1,
- omp_irc_out_of_range = -2,
- omp_irc_type_int = -3,
- omp_irc_type_ptr = -4,
- omp_irc_type_str = -5,
- omp_irc_other = -6
-} omp_interop_rc_t;
-
-typedef enum omp_interop_fr {
- omp_ifr_cuda = 1,
- omp_ifr_cuda_driver = 2,
- omp_ifr_opencl = 3,
- omp_ifr_sycl = 4,
- omp_ifr_hip = 5,
- omp_ifr_level_zero = 6,
- omp_ifr_last = 7
-} omp_interop_fr_t;
-
-typedef void *omp_interop_t;
-
-/*!
- * The `omp_get_num_interop_properties` routine retrieves the number of
- * implementation-defined properties available for an `omp_interop_t` object.
- */
-int __KAI_KMPC_CONVENTION omp_get_num_interop_properties(const omp_interop_t);
-/*!
- * The `omp_get_interop_int` routine retrieves an integer property from an
- * `omp_interop_t` object.
- */
-omp_intptr_t __KAI_KMPC_CONVENTION
-omp_get_interop_int(const omp_interop_t, omp_interop_property_t, int *);
-/*!
- * The `omp_get_interop_ptr` routine retrieves a pointer property from an
- * `omp_interop_t` object.
- */
-void *__KAI_KMPC_CONVENTION omp_get_interop_ptr(const omp_interop_t,
- omp_interop_property_t, int *);
-/*!
- * The `omp_get_interop_str` routine retrieves a string property from an
- * `omp_interop_t` object.
- */
-const char *__KAI_KMPC_CONVENTION
-omp_get_interop_str(const omp_interop_t, omp_interop_property_t, int *);
-/*!
- * The `omp_get_interop_name` routine retrieves a property name from an
- * `omp_interop_t` object.
- */
-const char *__KAI_KMPC_CONVENTION omp_get_interop_name(const omp_interop_t,
- omp_interop_property_t);
-/*!
- * The `omp_get_interop_type_desc` routine retrieves a description of the type
- * of a property associated with an `omp_interop_t` object.
- */
-const char *__KAI_KMPC_CONVENTION
-omp_get_interop_type_desc(const omp_interop_t, omp_interop_property_t);
-/*!
- * The `omp_get_interop_rc_desc` routine retrieves a description of the return
- * code associated with an `omp_interop_t` object.
- */
-extern const char *__KAI_KMPC_CONVENTION
-omp_get_interop_rc_desc(const omp_interop_t, omp_interop_rc_t);
-
-typedef enum omp_interop_backend_type_t {
- // reserve 0
- omp_interop_backend_type_cuda_1 = 1,
-} omp_interop_backend_type_t;
-
-typedef enum omp_foreign_runtime_ids {
- cuda = 1,
- cuda_driver = 2,
- opencl = 3,
- sycl = 4,
- hip = 5,
- level_zero = 6,
-} omp_foreign_runtime_ids_t;
-
-///} InteropAPI
-
} // extern "C"
#endif // OMPTARGET_OPENMP_OMP_H
diff --git a/openmp/libomptarget/include/Shared/PluginAPI.h b/openmp/libomptarget/include/Shared/PluginAPI.h
index ecf669c774f142..c80b9d1693c10e 100644
--- a/openmp/libomptarget/include/Shared/PluginAPI.h
+++ b/openmp/libomptarget/include/Shared/PluginAPI.h
@@ -17,6 +17,7 @@
#include <cstddef>
#include <cstdint>
+#include "OpenMP/InteropAPI.h"
#include "Shared/APITypes.h"
extern "C" {
@@ -165,6 +166,9 @@ void __tgt_rtl_set_info_flag(uint32_t);
// Print the device information
void __tgt_rtl_print_device_info(int32_t ID);
+// Set the runtime related information for interop object
+int32_t __tgt_rtl_set_interop_info(omp_interop_val_t *InteropPtr);
+
// Event related interfaces. It is expected to use the interfaces in the
// following way:
// 1) Create an event on the target device (__tgt_rtl_create_event).
diff --git a/openmp/libomptarget/include/Shared/PluginAPI.inc b/openmp/libomptarget/include/Shared/PluginAPI.inc
index e445da6852f7b4..c11341d969a6db 100644
--- a/openmp/libomptarget/include/Shared/PluginAPI.inc
+++ b/openmp/libomptarget/include/Shared/PluginAPI.inc
@@ -35,6 +35,7 @@ PLUGIN_API_HANDLE(synchronize);
PLUGIN_API_HANDLE(query_async);
PLUGIN_API_HANDLE(set_info_flag);
PLUGIN_API_HANDLE(print_device_info);
+PLUGIN_API_HANDLE(set_interop_info);
PLUGIN_API_HANDLE(create_event);
PLUGIN_API_HANDLE(record_event);
PLUGIN_API_HANDLE(wait_event);
diff --git a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
index a0fdde951b74a7..654cce3bb39c04 100644
--- a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -2772,6 +2772,17 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
bool useMultipleSdmaEngines() const { return OMPX_UseMultipleSdmaEngines; }
+ virtual Error setInteropInfo(omp_interop_val_t *InterOpPtr) override {
+ InterOpPtr->vendor_id = amdhsa;
+ InterOpPtr->backend_type_id = omp_interop_backend_type_amdhsa;
+
+ __tgt_device_info *DevInfo = &InterOpPtr->device_info;
+ DevInfo->Context = nullptr;
+ DevInfo->Device = &Agent;
+
+ return Plugin::success();
+ }
+
private:
using AMDGPUEventRef = AMDGPUResourceRef<AMDGPUEventTy>;
using AMDGPUEventManagerTy = GenericDeviceResourceManagerTy<AMDGPUEventRef>;
diff --git a/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h b/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h
index 79e8464bfda5c1..84159920a57309 100644
--- a/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h
+++ b/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h
@@ -19,6 +19,7 @@
#include <shared_mutex>
#include <vector>
+#include "OpenMP/InteropAPI.h"
#include "Shared/Debug.h"
#include "Shared/Environment.h"
#include "Shared/EnvironmentVar.h"
@@ -850,6 +851,10 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
return 0;
}
+ virtual Error setInteropInfo(omp_interop_val_t *InterOpPtr) {
+ return Error::success();
+ }
+
virtual Error getDeviceStackSize(uint64_t &V) = 0;
/// Returns true if current plugin architecture is an APU
@@ -1059,7 +1064,6 @@ struct GenericPluginTy {
/// we could not move this function into GenericDeviceTy.
virtual Expected<bool> isELFCompatible(StringRef Image) const = 0;
-protected:
/// Indicate whether a device id is valid.
bool isValidDeviceId(int32_t DeviceId) const {
return (DeviceId >= 0 && DeviceId < getNumDevices());
diff --git a/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp b/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
index b5f3c45c835fdb..febcd8ecb756c6 100644
--- a/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
@@ -2040,6 +2040,21 @@ int32_t __tgt_rtl_init_plugin() {
return OFFLOAD_SUCCESS;
}
+int32_t __tgt_rtl_set_interop_info(omp_interop_val_t *InterOpPtr) {
+ assert(InterOpPtr && "Interop object is allocated");
+ int32_t DevId = InterOpPtr->device_id;
+
+ assert(PluginTy::get().isValidDeviceId(DevId) && "Device Id is valid");
+ if (auto Err = PluginTy::get().getDevice(DevId).setInteropInfo(InterOpPtr)) {
+ REPORT("Failure to determine the OpenMP interop object info for Device Id "
+ "%i\n",
+ DevId);
+ return OFFLOAD_FAIL;
+ }
+
+ return OFFLOAD_SUCCESS;
+}
+
int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *Image) {
if (!PluginTy::isActive())
return false;
diff --git a/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
index fc74c6aa23fddd..ad074a88035b5c 100644
--- a/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
+++ b/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
@@ -1143,6 +1143,17 @@ struct CUDADeviceTy : public GenericDeviceTy {
/// Returns the clock frequency for the given NVPTX device.
uint64_t getClockFrequency() const override { return 1000000000; }
+ virtual Error setInteropInfo(omp_interop_val_t *InterOpPtr) override {
+ InterOpPtr->vendor_id = cuda;
+ InterOpPtr->backend_type_id = omp_interop_backend_type_cuda;
+
+ __tgt_device_info *DevInfo = &InterOpPtr->device_info;
+ DevInfo->Context = Context;
+ DevInfo->Device = Device;
+
+ return Plugin::success();
+ }
+
private:
using CUDAStreamManagerTy = GenericDeviceResourceManagerTy<CUDAStreamRef>;
using CUDAEventManagerTy = GenericDeviceResourceManagerTy<CUDAEventRef>;
diff --git a/openmp/libomptarget/src/OpenMP/InteropAPI.cpp b/openmp/libomptarget/src/OpenMP/InteropAPI.cpp
index 1a995cde7816e1..1db2addb25119c 100644
--- a/openmp/libomptarget/src/OpenMP/InteropAPI.cpp
+++ b/openmp/libomptarget/src/OpenMP/InteropAPI.cpp
@@ -70,8 +70,21 @@ const char *getVendorIdToStr(const omp_foreign_runtime_ids_t VendorId) {
return ("hip");
case level_zero:
return ("level_zero");
+ case amdhsa:
+ return ("amdhsa");
+ default:
+ return ("unknown");
+ }
+}
+
+const char *getBackendIdToStr(intptr_t BackendId) {
+ switch (BackendId) {
+ case omp_interop_backend_type_cuda:
+ return "cuda backend";
+ case omp_interop_backend_type_amdhsa:
+ return "amdhsa backend";
}
- return ("unknown");
+ return "unknown backend";
}
template <typename PropertyTy>
@@ -105,6 +118,8 @@ const char *getProperty<const char *>(omp_interop_val_t &InteropVal,
: "device+context";
case omp_ipr_vendor_name:
return getVendorIdToStr(InteropVal.vendor_id);
+ case omp_ipr_fr_name:
+ return getBackendIdToStr(InteropVal.backend_type_id);
default:
getTypeMismatch(Property, Err);
return nullptr;
@@ -221,8 +236,11 @@ void __tgt_interop_init(ident_t *LocRef, int32_t Gtid,
NoaliasDepList);
}
- InteropPtr = new omp_interop_val_t(DeviceId, InteropType);
+ // Create interop value object
+ InteropPtr = new omp_interop_val_t(DeviceId, InteropType, invalid,
+ omp_interop_backend_type_invalid);
+ // Get an intitialized and ready device, or error
auto DeviceOrErr = PM->getDevice(DeviceId);
if (!DeviceOrErr) {
InteropPtr->err_str = copyErrorString(DeviceOrErr.takeError());
@@ -230,12 +248,15 @@ void __tgt_interop_init(ident_t *LocRef, int32_t Gtid,
}
DeviceTy &Device = *DeviceOrErr;
- if (!Device.RTL || !Device.RTL->init_device_info ||
- Device.RTL->init_device_info(DeviceId, &(InteropPtr)->device_info,
- &(InteropPtr)->err_str)) {
+ if (!Device.RTL || !Device.RTL->set_interop_info) {
delete InteropPtr;
InteropPtr = omp_interop_none;
+ return;
}
+
+ // Retrieve the target specific interop value object
+ Device.RTL->set_interop_info(InteropPtr);
+
if (InteropType == kmp_interop_type_tasksync) {
if (!Device.RTL || !Device.RTL->init_async_info ||
Device.RTL->init_async_info(DeviceId, &(InteropPtr)->async_info)) {
diff --git a/openmp/libomptarget/test/api/omp_interop_amdgpu.c b/openmp/libomptarget/test/api/omp_interop_amdgpu.c
new file mode 100644
index 00000000000000..c66df93d44dc94
--- /dev/null
+++ b/openmp/libomptarget/test/api/omp_interop_amdgpu.c
@@ -0,0 +1,92 @@
+// RUN: %libomptarget-compile-amdgcn-amd-amdhsa -O1
+// RUN: %libomptarget-run-amdgcn-amd-amdhsa | %fcheck-amdgcn-amd-amdhsa
+// REQUIRES: amdgcn-amd-amdhsa
+
+#include <omp.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#define N 16384
+
+void vectorSet(int n, double s, double *x) {
+ for (int i = 0; i < n; ++i)
+ x[i] = s * (i + 1);
+}
+
+void vectorCopy(int n, double *x, double *y) {
+ for (int i = 0; i < n; ++i)
+ y[i] = x[i];
+}
+
+void vectorScale(int n, double s, double *x) {
+ for (int i = 0; i < n; ++i)
+ x[i] = s * x[i];
+}
+
+int main() {
+ const double ScaleFactor = 2.0;
+ double x[N], y[N];
+ omp_interop_t SyncObj = omp_interop_none;
+ int DeviceNum = omp_get_default_device();
+
+ // clang-format off
+ #pragma omp target nowait depend(out : x [0:N]) \
+ map(from : x [0:N]) device(DeviceNum)
+ // clang-format on
+ vectorSet(N, 1.0, x);
+
+#pragma omp task depend(out : y [0:N])
+ vectorSet(N, -1.0, y);
+
+ // Get SyncObject for synchronization
+ // clang-format off
+ #pragma omp interop init(targetsync : SyncObj) device(DeviceNum) \
+ depend(in : x [0:N]) depend(inout : y [0:N])
+ // clang-format on
+
+ int ForeignContextId = (int)omp_get_interop_int(SyncObj, omp_ipr_fr_id, NULL);
+ char *ForeignContextName =
+ (char *)omp_get_interop_str(SyncObj, omp_ipr_fr_name, NULL);
+
+ if (SyncObj != omp_interop_none && ForeignContextId == omp_ifr_amdhsa) {
+ printf("OpenMP working with %s runtime to execute async memcpy.\n",
+ ForeignContextName);
+ int Status;
+ omp_get_interop_ptr(SyncObj, omp_ipr_targetsync, &Status);
+
+ if (Status != omp_irc_success) {
+ fprintf(stderr, "ERROR: Failed to get %s stream, rt error = %d.\n",
+ ForeignContextName, Status);
+ if (Status == omp_irc_no_value)
+ fprintf(stderr, "Parameters valid, but no meaningful value available.");
+ exit(1);
+ }
+
+ vectorCopy(N, x, y);
+ } else {
+ // Execute as OpenMP offload
+ printf("Notice: Offloading myCopy to perform memcpy.\n");
+ // clang-format off
+ #pragma omp target depend(in : x [0:N]) depend(inout : y [0:N]) nowait \
+ map(to : x [0:N]) map(tofrom : y [0:N]) device(DeviceNum)
+ // clang-format on
+ vectorCopy(N, x, y);
+ }
+
+ // This also ensures foreign tasks complete
+#pragma omp interop destroy(SyncObj) nowait depend(out : y [0:N])
+
+#pragma omp target depend(inout : x [0:N])
+ vectorScale(N, ScaleFactor, x);
+
+#pragma omp taskwait
+
+ printf("(1 : 16384) %f:%f\n", y[0], y[N - 1]);
+ printf("(2 : 32768) %f:%f\n", x[0], x[N - 1]);
+
+ return 0;
+}
+
+// ToDo: Add meaningful checks; the following is a placeholder.
+
+// CHECK: OpenMP working with amdhsa backend runtime to execute async memcpy
diff --git a/openmp/runtime/src/include/omp.h.var b/openmp/runtime/src/include/omp.h.var
index eb3ab7778606a3..7c60764e447163 100644
--- a/openmp/runtime/src/include/omp.h.var
+++ b/openmp/runtime/src/include/omp.h.var
@@ -192,7 +192,8 @@
omp_ifr_sycl = 4,
omp_ifr_hip = 5,
omp_ifr_level_zero = 6,
- omp_ifr_last = 7
+ omp_ifr_amdhsa = 7,
+ omp_ifr_last = 8
} omp_interop_fr_t;
typedef void * omp_interop_t;
diff --git a/openmp/runtime/src/include/omp_lib.h.var b/openmp/runtime/src/include/omp_lib.h.var
index a709a2f298f8c8..a076890b1207b2 100644
--- a/openmp/runtime/src/include/omp_lib.h.var
+++ b/openmp/runtime/src/include/omp_lib.h.var
@@ -261,8 +261,10 @@
parameter(omp_ifr_hip=5)
integer(kind=omp_interop_fr_kind)omp_ifr_level_zero
parameter(omp_ifr_level_zero=6)
+ integer(kind=omp_interop_fr_kind)omp_ifr_amdhsa
+ parameter(omp_ifr_amdhsa=7)
integer(kind=omp_interop_fr_kind)omp_ifr...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/88000
More information about the Openmp-commits
mailing list