[Openmp-commits] [openmp] [OpenMP][AMDGPU] Add interop support for OpenMP AMD GPU plugin (PR #88000)
Michael Halkenhäuser via Openmp-commits
openmp-commits at lists.llvm.org
Mon Apr 8 07:33:41 PDT 2024
https://github.com/mhalk created https://github.com/llvm/llvm-project/pull/88000
Add interop related functionalities for OpenMP AMD GPU plugin, including get async queue, get device reference and get backend runtime's ref ID.
Originally authored here: https://reviews.llvm.org/D137607
>From f2a4e524a2be3384d940a4ae86510a8a18e22ea0 Mon Sep 17 00:00:00 2001
From: Jisheng Zhao <jishengz at gmail.com>
Date: Wed, 28 Jun 2023 04:54:51 -0400
Subject: [PATCH] [OpenMP][AMDGPU] Add interop support for OpenMP AMD GPU
plugin
Add interop related functionalities for OpenMP AMD GPU plugin, including get
async queue, get device reference and get backend runtime's ref ID.
Originally authored here: https://reviews.llvm.org/D137607
Co-authored-by: JP Lehr <JanPatrick.Lehr at amd.com>
Co-authored-by: Michael Halkenhaeuser <MichaelGerald.Halkenhauser at amd.com>
---
.../libomptarget/include/OpenMP/InteropAPI.h | 107 +++++++++++++++++-
openmp/libomptarget/include/OpenMP/omp.h | 106 -----------------
.../libomptarget/include/Shared/PluginAPI.h | 4 +
.../libomptarget/include/Shared/PluginAPI.inc | 1 +
.../plugins-nextgen/amdgpu/src/rtl.cpp | 11 ++
.../common/include/PluginInterface.h | 6 +-
.../common/src/PluginInterface.cpp | 15 +++
.../plugins-nextgen/cuda/src/rtl.cpp | 11 ++
openmp/libomptarget/src/OpenMP/InteropAPI.cpp | 31 ++++-
.../test/api/omp_interop_amdgpu.c | 92 +++++++++++++++
openmp/runtime/src/include/omp.h.var | 3 +-
openmp/runtime/src/include/omp_lib.h.var | 4 +-
openmp/runtime/src/kmp_ftn_entry.h | 3 +-
13 files changed, 274 insertions(+), 120 deletions(-)
create mode 100644 openmp/libomptarget/test/api/omp_interop_amdgpu.c
diff --git a/openmp/libomptarget/include/OpenMP/InteropAPI.h b/openmp/libomptarget/include/OpenMP/InteropAPI.h
index 71c78760a32265..f686ea9bd85fa0 100644
--- a/openmp/libomptarget/include/OpenMP/InteropAPI.h
+++ b/openmp/libomptarget/include/OpenMP/InteropAPI.h
@@ -11,12 +11,72 @@
#ifndef OMPTARGET_OPENMP_INTEROP_API_H
#define OMPTARGET_OPENMP_INTEROP_API_H
-#include "omp.h"
+#define omp_interop_none 0
+#include "omp.h"
#include "omptarget.h"
extern "C" {
+/// TODO: Include the `omp.h` of the current build
+/* OpenMP 5.1 interop */
+typedef intptr_t omp_intptr_t;
+
+/* 0..omp_get_num_interop_properties()-1 are reserved for implementation-defined
+ * properties */
+typedef enum omp_interop_property {
+ omp_ipr_fr_id = -1,
+ omp_ipr_fr_name = -2,
+ omp_ipr_vendor = -3,
+ omp_ipr_vendor_name = -4,
+ omp_ipr_device_num = -5,
+ omp_ipr_platform = -6,
+ omp_ipr_device = -7,
+ omp_ipr_device_context = -8,
+ omp_ipr_targetsync = -9,
+ omp_ipr_first = -9
+} omp_interop_property_t;
+
+typedef enum omp_interop_rc {
+ omp_irc_no_value = 1,
+ omp_irc_success = 0,
+ omp_irc_empty = -1,
+ omp_irc_out_of_range = -2,
+ omp_irc_type_int = -3,
+ omp_irc_type_ptr = -4,
+ omp_irc_type_str = -5,
+ omp_irc_other = -6
+} omp_interop_rc_t;
+
+typedef enum omp_interop_fr {
+ omp_ifr_cuda = 1,
+ omp_ifr_cuda_driver = 2,
+ omp_ifr_opencl = 3,
+ omp_ifr_sycl = 4,
+ omp_ifr_hip = 5,
+ omp_ifr_level_zero = 6,
+ omp_ifr_amdhsa = 7,
+ omp_ifr_last = 8
+} omp_interop_fr_t;
+
+typedef enum omp_interop_backend_type_t {
+ // reserve 0
+ omp_interop_backend_type_cuda = 1,
+ omp_interop_backend_type_amdhsa = 7,
+ omp_interop_backend_type_invalid = 8
+} omp_interop_backend_type_t;
+
+typedef enum omp_foreign_runtime_ids {
+ invalid = 0,
+ cuda = 1,
+ cuda_driver = 2,
+ opencl = 3,
+ sycl = 4,
+ hip = 5,
+ level_zero = 6,
+ amdhsa = 7
+} omp_foreign_runtime_ids_t;
+
typedef enum kmp_interop_type_t {
kmp_interop_type_unknown = -1,
kmp_interop_type_platform,
@@ -24,20 +84,57 @@ typedef enum kmp_interop_type_t {
kmp_interop_type_tasksync,
} kmp_interop_type_t;
+typedef void *omp_interop_t;
+
/// The interop value type, aka. the interop object.
typedef struct omp_interop_val_t {
/// Device and interop-type are determined at construction time and fix.
- omp_interop_val_t(intptr_t device_id, kmp_interop_type_t interop_type)
- : interop_type(interop_type), device_id(device_id) {}
+ omp_interop_val_t(intptr_t device_id, kmp_interop_type_t interop_type,
+ omp_foreign_runtime_ids_t vendor_id,
+ intptr_t backend_type_id)
+ : interop_type(interop_type), device_id(device_id), vendor_id(vendor_id),
+ backend_type_id(backend_type_id) {}
const char *err_str = nullptr;
__tgt_async_info *async_info = nullptr;
__tgt_device_info device_info;
const kmp_interop_type_t interop_type;
const intptr_t device_id;
- const omp_foreign_runtime_ids_t vendor_id = cuda;
- const intptr_t backend_type_id = omp_interop_backend_type_cuda_1;
+ omp_foreign_runtime_ids_t vendor_id;
+ intptr_t backend_type_id;
} omp_interop_val_t;
+/// Retrieves the number of implementation-defined properties available for an
+/// omp_interop_t object.
+int __KAI_KMPC_CONVENTION omp_get_num_interop_properties(const omp_interop_t);
+
+/// Retrieves an integer property from an omp_interop_t object.
+omp_intptr_t __KAI_KMPC_CONVENTION omp_get_interop_int(const omp_interop_t,
+ omp_interop_property_t,
+ int *);
+
+/// Retrieves a pointer property from an omp_interop_t object.
+void *__KAI_KMPC_CONVENTION omp_get_interop_ptr(const omp_interop_t,
+ omp_interop_property_t, int *);
+
+/// Retrieve a string property from an omp_interop_t object.
+const char *__KAI_KMPC_CONVENTION omp_get_interop_str(const omp_interop_t,
+ omp_interop_property_t,
+ int *);
+
+/// Retrieve a property name from an omp_interop_t object.
+const char *__KAI_KMPC_CONVENTION omp_get_interop_name(const omp_interop_t,
+ omp_interop_property_t);
+
+/// Retrieve a description of the type of a property associated with an
+/// omp_interop_t object.
+const char *__KAI_KMPC_CONVENTION
+omp_get_interop_type_desc(const omp_interop_t, omp_interop_property_t);
+
+/// Retrieve a description of the return code associated with an omp_interop_t
+/// object.
+extern const char *__KAI_KMPC_CONVENTION
+omp_get_interop_rc_desc(const omp_interop_t, omp_interop_rc_t);
+
} // extern "C"
#endif // OMPTARGET_OPENMP_INTEROP_API_H
diff --git a/openmp/libomptarget/include/OpenMP/omp.h b/openmp/libomptarget/include/OpenMP/omp.h
index b44c6aff1b289c..d360b5ef3b1641 100644
--- a/openmp/libomptarget/include/OpenMP/omp.h
+++ b/openmp/libomptarget/include/OpenMP/omp.h
@@ -44,112 +44,6 @@ int omp_get_default_device(void) __attribute__((weak));
///}
-/// InteropAPI
-///
-///{
-
-/// TODO: Include the `omp.h` of the current build
-/* OpenMP 5.1 interop */
-typedef intptr_t omp_intptr_t;
-
-/* 0..omp_get_num_interop_properties()-1 are reserved for implementation-defined
- * properties */
-typedef enum omp_interop_property {
- omp_ipr_fr_id = -1,
- omp_ipr_fr_name = -2,
- omp_ipr_vendor = -3,
- omp_ipr_vendor_name = -4,
- omp_ipr_device_num = -5,
- omp_ipr_platform = -6,
- omp_ipr_device = -7,
- omp_ipr_device_context = -8,
- omp_ipr_targetsync = -9,
- omp_ipr_first = -9
-} omp_interop_property_t;
-
-#define omp_interop_none 0
-
-typedef enum omp_interop_rc {
- omp_irc_no_value = 1,
- omp_irc_success = 0,
- omp_irc_empty = -1,
- omp_irc_out_of_range = -2,
- omp_irc_type_int = -3,
- omp_irc_type_ptr = -4,
- omp_irc_type_str = -5,
- omp_irc_other = -6
-} omp_interop_rc_t;
-
-typedef enum omp_interop_fr {
- omp_ifr_cuda = 1,
- omp_ifr_cuda_driver = 2,
- omp_ifr_opencl = 3,
- omp_ifr_sycl = 4,
- omp_ifr_hip = 5,
- omp_ifr_level_zero = 6,
- omp_ifr_last = 7
-} omp_interop_fr_t;
-
-typedef void *omp_interop_t;
-
-/*!
- * The `omp_get_num_interop_properties` routine retrieves the number of
- * implementation-defined properties available for an `omp_interop_t` object.
- */
-int __KAI_KMPC_CONVENTION omp_get_num_interop_properties(const omp_interop_t);
-/*!
- * The `omp_get_interop_int` routine retrieves an integer property from an
- * `omp_interop_t` object.
- */
-omp_intptr_t __KAI_KMPC_CONVENTION
-omp_get_interop_int(const omp_interop_t, omp_interop_property_t, int *);
-/*!
- * The `omp_get_interop_ptr` routine retrieves a pointer property from an
- * `omp_interop_t` object.
- */
-void *__KAI_KMPC_CONVENTION omp_get_interop_ptr(const omp_interop_t,
- omp_interop_property_t, int *);
-/*!
- * The `omp_get_interop_str` routine retrieves a string property from an
- * `omp_interop_t` object.
- */
-const char *__KAI_KMPC_CONVENTION
-omp_get_interop_str(const omp_interop_t, omp_interop_property_t, int *);
-/*!
- * The `omp_get_interop_name` routine retrieves a property name from an
- * `omp_interop_t` object.
- */
-const char *__KAI_KMPC_CONVENTION omp_get_interop_name(const omp_interop_t,
- omp_interop_property_t);
-/*!
- * The `omp_get_interop_type_desc` routine retrieves a description of the type
- * of a property associated with an `omp_interop_t` object.
- */
-const char *__KAI_KMPC_CONVENTION
-omp_get_interop_type_desc(const omp_interop_t, omp_interop_property_t);
-/*!
- * The `omp_get_interop_rc_desc` routine retrieves a description of the return
- * code associated with an `omp_interop_t` object.
- */
-extern const char *__KAI_KMPC_CONVENTION
-omp_get_interop_rc_desc(const omp_interop_t, omp_interop_rc_t);
-
-typedef enum omp_interop_backend_type_t {
- // reserve 0
- omp_interop_backend_type_cuda_1 = 1,
-} omp_interop_backend_type_t;
-
-typedef enum omp_foreign_runtime_ids {
- cuda = 1,
- cuda_driver = 2,
- opencl = 3,
- sycl = 4,
- hip = 5,
- level_zero = 6,
-} omp_foreign_runtime_ids_t;
-
-///} InteropAPI
-
} // extern "C"
#endif // OMPTARGET_OPENMP_OMP_H
diff --git a/openmp/libomptarget/include/Shared/PluginAPI.h b/openmp/libomptarget/include/Shared/PluginAPI.h
index ecf669c774f142..c80b9d1693c10e 100644
--- a/openmp/libomptarget/include/Shared/PluginAPI.h
+++ b/openmp/libomptarget/include/Shared/PluginAPI.h
@@ -17,6 +17,7 @@
#include <cstddef>
#include <cstdint>
+#include "OpenMP/InteropAPI.h"
#include "Shared/APITypes.h"
extern "C" {
@@ -165,6 +166,9 @@ void __tgt_rtl_set_info_flag(uint32_t);
// Print the device information
void __tgt_rtl_print_device_info(int32_t ID);
+// Set the runtime related information for interop object
+int32_t __tgt_rtl_set_interop_info(omp_interop_val_t *InteropPtr);
+
// Event related interfaces. It is expected to use the interfaces in the
// following way:
// 1) Create an event on the target device (__tgt_rtl_create_event).
diff --git a/openmp/libomptarget/include/Shared/PluginAPI.inc b/openmp/libomptarget/include/Shared/PluginAPI.inc
index e445da6852f7b4..c11341d969a6db 100644
--- a/openmp/libomptarget/include/Shared/PluginAPI.inc
+++ b/openmp/libomptarget/include/Shared/PluginAPI.inc
@@ -35,6 +35,7 @@ PLUGIN_API_HANDLE(synchronize);
PLUGIN_API_HANDLE(query_async);
PLUGIN_API_HANDLE(set_info_flag);
PLUGIN_API_HANDLE(print_device_info);
+PLUGIN_API_HANDLE(set_interop_info);
PLUGIN_API_HANDLE(create_event);
PLUGIN_API_HANDLE(record_event);
PLUGIN_API_HANDLE(wait_event);
diff --git a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
index a0fdde951b74a7..654cce3bb39c04 100644
--- a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -2772,6 +2772,17 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
bool useMultipleSdmaEngines() const { return OMPX_UseMultipleSdmaEngines; }
+ virtual Error setInteropInfo(omp_interop_val_t *InterOpPtr) override {
+ InterOpPtr->vendor_id = amdhsa;
+ InterOpPtr->backend_type_id = omp_interop_backend_type_amdhsa;
+
+ __tgt_device_info *DevInfo = &InterOpPtr->device_info;
+ DevInfo->Context = nullptr;
+ DevInfo->Device = &Agent;
+
+ return Plugin::success();
+ }
+
private:
using AMDGPUEventRef = AMDGPUResourceRef<AMDGPUEventTy>;
using AMDGPUEventManagerTy = GenericDeviceResourceManagerTy<AMDGPUEventRef>;
diff --git a/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h b/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h
index 79e8464bfda5c1..84159920a57309 100644
--- a/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h
+++ b/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h
@@ -19,6 +19,7 @@
#include <shared_mutex>
#include <vector>
+#include "OpenMP/InteropAPI.h"
#include "Shared/Debug.h"
#include "Shared/Environment.h"
#include "Shared/EnvironmentVar.h"
@@ -850,6 +851,10 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
return 0;
}
+ virtual Error setInteropInfo(omp_interop_val_t *InterOpPtr) {
+ return Error::success();
+ }
+
virtual Error getDeviceStackSize(uint64_t &V) = 0;
/// Returns true if current plugin architecture is an APU
@@ -1059,7 +1064,6 @@ struct GenericPluginTy {
/// we could not move this function into GenericDeviceTy.
virtual Expected<bool> isELFCompatible(StringRef Image) const = 0;
-protected:
/// Indicate whether a device id is valid.
bool isValidDeviceId(int32_t DeviceId) const {
return (DeviceId >= 0 && DeviceId < getNumDevices());
diff --git a/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp b/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
index b5f3c45c835fdb..febcd8ecb756c6 100644
--- a/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
@@ -2040,6 +2040,21 @@ int32_t __tgt_rtl_init_plugin() {
return OFFLOAD_SUCCESS;
}
+int32_t __tgt_rtl_set_interop_info(omp_interop_val_t *InterOpPtr) {
+ assert(InterOpPtr && "Interop object is allocated");
+ int32_t DevId = InterOpPtr->device_id;
+
+ assert(PluginTy::get().isValidDeviceId(DevId) && "Device Id is valid");
+ if (auto Err = PluginTy::get().getDevice(DevId).setInteropInfo(InterOpPtr)) {
+ REPORT("Failure to determine the OpenMP interop object info for Device Id "
+ "%i\n",
+ DevId);
+ return OFFLOAD_FAIL;
+ }
+
+ return OFFLOAD_SUCCESS;
+}
+
int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *Image) {
if (!PluginTy::isActive())
return false;
diff --git a/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
index fc74c6aa23fddd..ad074a88035b5c 100644
--- a/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
+++ b/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
@@ -1143,6 +1143,17 @@ struct CUDADeviceTy : public GenericDeviceTy {
/// Returns the clock frequency for the given NVPTX device.
uint64_t getClockFrequency() const override { return 1000000000; }
+ virtual Error setInteropInfo(omp_interop_val_t *InterOpPtr) override {
+ InterOpPtr->vendor_id = cuda;
+ InterOpPtr->backend_type_id = omp_interop_backend_type_cuda;
+
+ __tgt_device_info *DevInfo = &InterOpPtr->device_info;
+ DevInfo->Context = Context;
+ DevInfo->Device = Device;
+
+ return Plugin::success();
+ }
+
private:
using CUDAStreamManagerTy = GenericDeviceResourceManagerTy<CUDAStreamRef>;
using CUDAEventManagerTy = GenericDeviceResourceManagerTy<CUDAEventRef>;
diff --git a/openmp/libomptarget/src/OpenMP/InteropAPI.cpp b/openmp/libomptarget/src/OpenMP/InteropAPI.cpp
index 1a995cde7816e1..1db2addb25119c 100644
--- a/openmp/libomptarget/src/OpenMP/InteropAPI.cpp
+++ b/openmp/libomptarget/src/OpenMP/InteropAPI.cpp
@@ -70,8 +70,21 @@ const char *getVendorIdToStr(const omp_foreign_runtime_ids_t VendorId) {
return ("hip");
case level_zero:
return ("level_zero");
+ case amdhsa:
+ return ("amdhsa");
+ default:
+ return ("unknown");
+ }
+}
+
+const char *getBackendIdToStr(intptr_t BackendId) {
+ switch (BackendId) {
+ case omp_interop_backend_type_cuda:
+ return "cuda backend";
+ case omp_interop_backend_type_amdhsa:
+ return "amdhsa backend";
}
- return ("unknown");
+ return "unknown backend";
}
template <typename PropertyTy>
@@ -105,6 +118,8 @@ const char *getProperty<const char *>(omp_interop_val_t &InteropVal,
: "device+context";
case omp_ipr_vendor_name:
return getVendorIdToStr(InteropVal.vendor_id);
+ case omp_ipr_fr_name:
+ return getBackendIdToStr(InteropVal.backend_type_id);
default:
getTypeMismatch(Property, Err);
return nullptr;
@@ -221,8 +236,11 @@ void __tgt_interop_init(ident_t *LocRef, int32_t Gtid,
NoaliasDepList);
}
- InteropPtr = new omp_interop_val_t(DeviceId, InteropType);
+ // Create interop value object
+ InteropPtr = new omp_interop_val_t(DeviceId, InteropType, invalid,
+ omp_interop_backend_type_invalid);
+ // Get an intitialized and ready device, or error
auto DeviceOrErr = PM->getDevice(DeviceId);
if (!DeviceOrErr) {
InteropPtr->err_str = copyErrorString(DeviceOrErr.takeError());
@@ -230,12 +248,15 @@ void __tgt_interop_init(ident_t *LocRef, int32_t Gtid,
}
DeviceTy &Device = *DeviceOrErr;
- if (!Device.RTL || !Device.RTL->init_device_info ||
- Device.RTL->init_device_info(DeviceId, &(InteropPtr)->device_info,
- &(InteropPtr)->err_str)) {
+ if (!Device.RTL || !Device.RTL->set_interop_info) {
delete InteropPtr;
InteropPtr = omp_interop_none;
+ return;
}
+
+ // Retrieve the target specific interop value object
+ Device.RTL->set_interop_info(InteropPtr);
+
if (InteropType == kmp_interop_type_tasksync) {
if (!Device.RTL || !Device.RTL->init_async_info ||
Device.RTL->init_async_info(DeviceId, &(InteropPtr)->async_info)) {
diff --git a/openmp/libomptarget/test/api/omp_interop_amdgpu.c b/openmp/libomptarget/test/api/omp_interop_amdgpu.c
new file mode 100644
index 00000000000000..c66df93d44dc94
--- /dev/null
+++ b/openmp/libomptarget/test/api/omp_interop_amdgpu.c
@@ -0,0 +1,92 @@
+// RUN: %libomptarget-compile-amdgcn-amd-amdhsa -O1
+// RUN: %libomptarget-run-amdgcn-amd-amdhsa | %fcheck-amdgcn-amd-amdhsa
+// REQUIRES: amdgcn-amd-amdhsa
+
+#include <omp.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#define N 16384
+
+void vectorSet(int n, double s, double *x) {
+ for (int i = 0; i < n; ++i)
+ x[i] = s * (i + 1);
+}
+
+void vectorCopy(int n, double *x, double *y) {
+ for (int i = 0; i < n; ++i)
+ y[i] = x[i];
+}
+
+void vectorScale(int n, double s, double *x) {
+ for (int i = 0; i < n; ++i)
+ x[i] = s * x[i];
+}
+
+int main() {
+ const double ScaleFactor = 2.0;
+ double x[N], y[N];
+ omp_interop_t SyncObj = omp_interop_none;
+ int DeviceNum = omp_get_default_device();
+
+ // clang-format off
+ #pragma omp target nowait depend(out : x [0:N]) \
+ map(from : x [0:N]) device(DeviceNum)
+ // clang-format on
+ vectorSet(N, 1.0, x);
+
+#pragma omp task depend(out : y [0:N])
+ vectorSet(N, -1.0, y);
+
+ // Get SyncObject for synchronization
+ // clang-format off
+ #pragma omp interop init(targetsync : SyncObj) device(DeviceNum) \
+ depend(in : x [0:N]) depend(inout : y [0:N])
+ // clang-format on
+
+ int ForeignContextId = (int)omp_get_interop_int(SyncObj, omp_ipr_fr_id, NULL);
+ char *ForeignContextName =
+ (char *)omp_get_interop_str(SyncObj, omp_ipr_fr_name, NULL);
+
+ if (SyncObj != omp_interop_none && ForeignContextId == omp_ifr_amdhsa) {
+ printf("OpenMP working with %s runtime to execute async memcpy.\n",
+ ForeignContextName);
+ int Status;
+ omp_get_interop_ptr(SyncObj, omp_ipr_targetsync, &Status);
+
+ if (Status != omp_irc_success) {
+ fprintf(stderr, "ERROR: Failed to get %s stream, rt error = %d.\n",
+ ForeignContextName, Status);
+ if (Status == omp_irc_no_value)
+ fprintf(stderr, "Parameters valid, but no meaningful value available.");
+ exit(1);
+ }
+
+ vectorCopy(N, x, y);
+ } else {
+ // Execute as OpenMP offload
+ printf("Notice: Offloading myCopy to perform memcpy.\n");
+ // clang-format off
+ #pragma omp target depend(in : x [0:N]) depend(inout : y [0:N]) nowait \
+ map(to : x [0:N]) map(tofrom : y [0:N]) device(DeviceNum)
+ // clang-format on
+ vectorCopy(N, x, y);
+ }
+
+ // This also ensures foreign tasks complete
+#pragma omp interop destroy(SyncObj) nowait depend(out : y [0:N])
+
+#pragma omp target depend(inout : x [0:N])
+ vectorScale(N, ScaleFactor, x);
+
+#pragma omp taskwait
+
+ printf("(1 : 16384) %f:%f\n", y[0], y[N - 1]);
+ printf("(2 : 32768) %f:%f\n", x[0], x[N - 1]);
+
+ return 0;
+}
+
+// ToDo: Add meaningful checks; the following is a placeholder.
+
+// CHECK: OpenMP working with amdhsa backend runtime to execute async memcpy
diff --git a/openmp/runtime/src/include/omp.h.var b/openmp/runtime/src/include/omp.h.var
index eb3ab7778606a3..7c60764e447163 100644
--- a/openmp/runtime/src/include/omp.h.var
+++ b/openmp/runtime/src/include/omp.h.var
@@ -192,7 +192,8 @@
omp_ifr_sycl = 4,
omp_ifr_hip = 5,
omp_ifr_level_zero = 6,
- omp_ifr_last = 7
+ omp_ifr_amdhsa = 7,
+ omp_ifr_last = 8
} omp_interop_fr_t;
typedef void * omp_interop_t;
diff --git a/openmp/runtime/src/include/omp_lib.h.var b/openmp/runtime/src/include/omp_lib.h.var
index a709a2f298f8c8..a076890b1207b2 100644
--- a/openmp/runtime/src/include/omp_lib.h.var
+++ b/openmp/runtime/src/include/omp_lib.h.var
@@ -261,8 +261,10 @@
parameter(omp_ifr_hip=5)
integer(kind=omp_interop_fr_kind)omp_ifr_level_zero
parameter(omp_ifr_level_zero=6)
+ integer(kind=omp_interop_fr_kind)omp_ifr_amdhsa
+ parameter(omp_ifr_amdhsa=7)
integer(kind=omp_interop_fr_kind)omp_ifr_last
- parameter(omp_ifr_last=7)
+ parameter(omp_ifr_last=8)
integer(kind=omp_interop_kind)omp_interop_none
parameter(omp_interop_none=0)
diff --git a/openmp/runtime/src/kmp_ftn_entry.h b/openmp/runtime/src/kmp_ftn_entry.h
index 713561734c481a..6092799d03d6ef 100644
--- a/openmp/runtime/src/kmp_ftn_entry.h
+++ b/openmp/runtime/src/kmp_ftn_entry.h
@@ -1551,7 +1551,8 @@ typedef enum omp_interop_fr {
omp_ifr_sycl = 4,
omp_ifr_hip = 5,
omp_ifr_level_zero = 6,
- omp_ifr_last = 7
+ omp_ifr_amdhsa = 7,
+ omp_ifr_last = 8
} omp_interop_fr_t;
typedef void *omp_interop_t;
More information about the Openmp-commits
mailing list