[Openmp-commits] [llvm] [openmp] [OFFLOAD][OPENMP] 6.0 compatible interop interface (PR #143491)
Alex Duran via Openmp-commits
openmp-commits at lists.llvm.org
Wed Jul 2 05:13:53 PDT 2025
https://github.com/adurang updated https://github.com/llvm/llvm-project/pull/143491
>From 8694e6ec1dfa1300641854945c86b15c8d63966e Mon Sep 17 00:00:00 2001
From: Alex Duran <alejandro.duran at intel.com>
Date: Tue, 10 Jun 2025 10:39:29 +0200
Subject: [PATCH 1/3] [OFFLOAD][OPENMP] 6.0 compatible interop interface
The following patch introduces a new interop interface implementation with
the following characteristics:
* It supports the new 6.0 prefer_type specification
* It supports both explicit objects (from interop constructs) and
implicit objects (from variant calls).
* Implements a per-thread reuse mechanism for implicit objects to reduce
overheads.
* It provides a plugin interface that allows selecting the supported
interop types, and managing all the backend related interop operations
(init, sync, ...).
* It enables cooperation with the OpenMP runtime to allow
progress on OpenMP synchronizations.
* It cleanups some vendor/fr_id mismatchs from the current query
routines.
* It supports extension to define interop callbacks for library cleanup.
---
offload/include/OpenMP/InteropAPI.h | 149 ++++++-
offload/include/OpenMP/omp.h | 51 +--
offload/include/PerThreadTable.h | 109 +++++
offload/include/PluginManager.h | 7 +-
offload/include/Shared/APITypes.h | 1 +
offload/libomptarget/OffloadRTL.cpp | 6 +
offload/libomptarget/OpenMP/API.cpp | 12 +
offload/libomptarget/OpenMP/InteropAPI.cpp | 371 ++++++++++++------
offload/libomptarget/PluginManager.cpp | 6 +
offload/libomptarget/exports | 5 +-
.../common/include/PluginInterface.h | 55 +++
openmp/runtime/src/kmp.h | 7 +
openmp/runtime/src/kmp_barrier.cpp | 8 +
openmp/runtime/src/kmp_runtime.cpp | 15 +
openmp/runtime/src/kmp_tasking.cpp | 29 ++
15 files changed, 688 insertions(+), 143 deletions(-)
create mode 100644 offload/include/PerThreadTable.h
diff --git a/offload/include/OpenMP/InteropAPI.h b/offload/include/OpenMP/InteropAPI.h
index 71c78760a3226..61cbedf06a9a6 100644
--- a/offload/include/OpenMP/InteropAPI.h
+++ b/offload/include/OpenMP/InteropAPI.h
@@ -13,17 +13,70 @@
#include "omp.h"
+#include "PerThreadTable.h"
#include "omptarget.h"
extern "C" {
typedef enum kmp_interop_type_t {
kmp_interop_type_unknown = -1,
- kmp_interop_type_platform,
- kmp_interop_type_device,
- kmp_interop_type_tasksync,
+ kmp_interop_type_target,
+ kmp_interop_type_targetsync,
} kmp_interop_type_t;
+struct interop_attrs_t {
+ bool inorder : 1;
+ int reserved : 31;
+
+ /* Check if the supported attributes are compatible with the current
+ attributes. Only if an attribute is supported can the value be true,
+ otherwise it needs to be false
+ */
+ bool checkSupportedOnly(interop_attrs_t supported) const {
+ return supported.inorder || (!supported.inorder && !inorder);
+ }
+};
+
+struct interop_spec_t {
+ int32_t fr_id;
+ interop_attrs_t attrs; // Common attributes
+ int64_t impl_attrs; // Implementation specific attributes (recognized by each
+ // plugin)
+};
+
+struct interop_flags_t {
+ bool implicit : 1; // dispatch (true) or interop (false)
+ bool nowait : 1; // has nowait flag
+ int reserved : 30;
+};
+
+struct interop_ctx_t {
+ uint16_t version; // version of the interface (current is 0)
+ interop_flags_t flags;
+ int gtid;
+};
+
+struct dep_pack_t {
+ int32_t ndeps;
+ kmp_depend_info_t *deplist;
+ int32_t ndeps_noalias;
+ kmp_depend_info_t *noalias_deplist;
+};
+
+struct omp_interop_val_t;
+
+typedef void ompx_interop_cb_t(omp_interop_val_t *interop, void *data);
+
+struct omp_interop_cb_instance_t {
+ ompx_interop_cb_t *cb;
+ void *data;
+
+ omp_interop_cb_instance_t(ompx_interop_cb_t *cb, void *data)
+ : cb(cb), data(data) {}
+
+ void operator()(omp_interop_val_t *interop) { cb(interop, data); }
+};
+
/// The interop value type, aka. the interop object.
typedef struct omp_interop_val_t {
/// Device and interop-type are determined at construction time and fix.
@@ -34,10 +87,96 @@ typedef struct omp_interop_val_t {
__tgt_device_info device_info;
const kmp_interop_type_t interop_type;
const intptr_t device_id;
- const omp_foreign_runtime_ids_t vendor_id = cuda;
- const intptr_t backend_type_id = omp_interop_backend_type_cuda_1;
+ omp_vendor_id_t vendor_id = omp_vendor_llvm;
+ omp_foreign_runtime_id_t fr_id = omp_fr_none;
+ interop_attrs_t attrs{false, 0}; // Common prefer specification attributes
+ int64_t impl_attrs = 0; // Implementation prefer specification attributes
+
+ void *RTLProperty = nullptr; // Plugin dependent information
+ // For implicitly created Interop objects (e.g., from a dispatch construct)
+ // who owns the object
+ int OwnerGtid = -1;
+ // Marks whether the object was requested since the last time it was synced
+ bool Clean = true;
+
+ typedef llvm::SmallVector<omp_interop_cb_instance_t> callback_list_t;
+
+ callback_list_t CompletionCbs;
+
+ void reset() {
+ OwnerGtid = -1;
+ markClean();
+ clearCompletionCbs();
+ }
+
+ bool hasOwner() const { return OwnerGtid != -1; }
+
+ void setOwner(int gtid) { OwnerGtid = gtid; }
+ bool isOwnedBy(int gtid) { return OwnerGtid == gtid; }
+ bool isCompatibleWith(int32_t InteropType, const interop_spec_t &Spec);
+ bool isCompatibleWith(int32_t InteropType, const interop_spec_t &Spec,
+ int64_t DeviceNum, int gtid);
+ void markClean() { Clean = true; }
+ void markDirty() { Clean = false; }
+ bool isClean() const { return Clean; }
+
+ int32_t flush(DeviceTy &Device);
+ int32_t sync_barrier(DeviceTy &Device);
+ int32_t async_barrier(DeviceTy &Device);
+ int32_t release(DeviceTy &Device);
+
+ int32_t flush();
+ int32_t syncBarrier();
+ int32_t asyncBarrier();
+ int32_t release();
+
+ void addCompletionCb(ompx_interop_cb_t *cb, void *data) {
+ CompletionCbs.push_back(omp_interop_cb_instance_t(cb, data));
+ }
+
+ int numCompletionCbs() const { return CompletionCbs.size(); }
+ void clearCompletionCbs() { CompletionCbs.clear(); }
+
+ void runCompletionCbs() {
+ for (auto &cbInstance : CompletionCbs)
+ cbInstance(this);
+ clearCompletionCbs();
+ }
} omp_interop_val_t;
} // extern "C"
+struct InteropTableEntry {
+ using ContainerTy = typename std::vector<omp_interop_val_t *>;
+ using iterator = typename ContainerTy::iterator;
+
+ ContainerTy Interops;
+
+ const int reservedEntriesPerThread =
+ 20; // reserve some entries to avoid reallocation
+
+ void add(omp_interop_val_t *obj) {
+ if (Interops.capacity() == 0)
+ Interops.reserve(reservedEntriesPerThread);
+ Interops.push_back(obj);
+ }
+
+ template <class ClearFuncTy> void clear(ClearFuncTy f) {
+ for (auto &Obj : Interops) {
+ f(Obj);
+ }
+ }
+
+ /* vector interface */
+ int size() const { return Interops.size(); }
+ iterator begin() { return Interops.begin(); }
+ iterator end() { return Interops.end(); }
+ iterator erase(iterator it) { return Interops.erase(it); }
+};
+
+struct InteropTblTy
+ : public PerThreadTable<InteropTableEntry, omp_interop_val_t *> {
+ void clear();
+};
+
#endif // OMPTARGET_OPENMP_INTEROP_API_H
diff --git a/offload/include/OpenMP/omp.h b/offload/include/OpenMP/omp.h
index b44c6aff1b289..67b3bab9e8599 100644
--- a/offload/include/OpenMP/omp.h
+++ b/offload/include/OpenMP/omp.h
@@ -80,15 +80,18 @@ typedef enum omp_interop_rc {
omp_irc_other = -6
} omp_interop_rc_t;
-typedef enum omp_interop_fr {
- omp_ifr_cuda = 1,
- omp_ifr_cuda_driver = 2,
- omp_ifr_opencl = 3,
- omp_ifr_sycl = 4,
- omp_ifr_hip = 5,
- omp_ifr_level_zero = 6,
- omp_ifr_last = 7
-} omp_interop_fr_t;
+/* Foreign runtime values from OpenMP Additional Definitions document v2.1 */
+typedef enum omp_foreign_runtime_id_t {
+ omp_fr_none = 0,
+ omp_fr_cuda = 1,
+ omp_fr_cuda_driver = 2,
+ omp_fr_opencl = 3,
+ omp_fr_sycl = 4,
+ omp_fr_hip = 5,
+ omp_fr_level_zero = 6,
+ omp_fr_hsa = 7,
+ omp_fr_last = 8
+} omp_foreign_runtime_id_t;
typedef void *omp_interop_t;
@@ -134,19 +137,23 @@ omp_get_interop_type_desc(const omp_interop_t, omp_interop_property_t);
extern const char *__KAI_KMPC_CONVENTION
omp_get_interop_rc_desc(const omp_interop_t, omp_interop_rc_t);
-typedef enum omp_interop_backend_type_t {
- // reserve 0
- omp_interop_backend_type_cuda_1 = 1,
-} omp_interop_backend_type_t;
-
-typedef enum omp_foreign_runtime_ids {
- cuda = 1,
- cuda_driver = 2,
- opencl = 3,
- sycl = 4,
- hip = 5,
- level_zero = 6,
-} omp_foreign_runtime_ids_t;
+/* Vendor defined values from OpenMP Additional Definitions document v2.1*/
+typedef enum omp_vendor_id {
+ omp_vendor_unknown = 0,
+ omp_vendor_amd = 1,
+ omp_vendor_arm = 2,
+ omp_vendor_bsc = 3,
+ omp_vendor_fujitsu = 4,
+ omp_vendor_gnu = 5,
+ omp_vendor_hpe = 6,
+ omp_vendor_ibm = 7,
+ omp_vendor_intel = 8,
+ omp_vendor_llvm = 9,
+ omp_vendor_nec = 10,
+ omp_vendor_nvidia = 11,
+ omp_vendor_ti = 12,
+ omp_vendor_last = 13
+} omp_vendor_id_t;
///} InteropAPI
diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h
new file mode 100644
index 0000000000000..1e20b56c734d2
--- /dev/null
+++ b/offload/include/PerThreadTable.h
@@ -0,0 +1,109 @@
+//===-- PerThreadTable.h -- PerThread Storage Structure ----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Table indexed with one entry per thread.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef OFFLOAD_PERTHREADTABLE_H
+#define OFFLOAD_PERTHREADTABLE_H
+
+#include <list>
+#include <memory>
+#include <mutex>
+
+// Using an STL container (such as std::vector) indexed by thread ID has
+// too many race conditions issues so we store each thread entry into a
+// thread_local variable.
+// T is the container type used to store the objects, e.g., std::vector,
+// std::set, etc. by each thread. O is the type of the stored objects e.g.,
+// omp_interop_val_t *, ...
+
+template <typename ContainerType, typename ObjectType> struct PerThreadTable {
+ using iterator = typename ContainerType::iterator;
+
+ struct PerThreadData {
+ size_t NElements = 0;
+ std::unique_ptr<ContainerType> ThEntry;
+ };
+
+ std::mutex Mtx;
+ std::list<PerThreadData *> ThreadDataList;
+
+ // define default constructors, disable copy and move constructors
+ PerThreadTable() = default;
+ PerThreadTable(const PerThreadTable &) = delete;
+ PerThreadTable(PerThreadTable &&) = delete;
+ PerThreadTable &operator=(const PerThreadTable &) = delete;
+ PerThreadTable &operator=(PerThreadTable &&) = delete;
+ ~PerThreadTable() {
+ std::lock_guard<std::mutex> Lock(Mtx);
+ ThreadDataList.clear();
+ }
+
+private:
+ PerThreadData &getThreadData() {
+ static thread_local PerThreadData ThData;
+ return ThData;
+ }
+
+protected:
+ ContainerType &getThreadEntry() {
+ auto &ThData = getThreadData();
+ if (ThData.ThEntry)
+ return *ThData.ThEntry;
+ ThData.ThEntry = std::make_unique<ContainerType>();
+ std::lock_guard<std::mutex> Lock(Mtx);
+ ThreadDataList.push_back(&ThData);
+ return *ThData.ThEntry;
+ }
+
+ size_t &getThreadNElements() {
+ auto &ThData = getThreadData();
+ return ThData.NElements;
+ }
+
+public:
+ void add(ObjectType obj) {
+ auto &Entry = getThreadEntry();
+ auto &NElements = getThreadNElements();
+ NElements++;
+ Entry.add(obj);
+ }
+
+ iterator erase(iterator it) {
+ auto &Entry = getThreadEntry();
+ auto &NElements = getThreadNElements();
+ NElements--;
+ return Entry.erase(it);
+ }
+
+ size_t size() { return getThreadNElements(); }
+
+ // Iterators to traverse objects owned by
+ // the current thread
+ iterator begin() {
+ auto &Entry = getThreadEntry();
+ return Entry.begin();
+ }
+ iterator end() {
+ auto &Entry = getThreadEntry();
+ return Entry.end();
+ }
+
+ template <class F> void clear(F f) {
+ std::lock_guard<std::mutex> Lock(Mtx);
+ for (auto ThData : ThreadDataList) {
+ ThData->ThEntry->clear(f);
+ ThData->NElements = 0;
+ }
+ ThreadDataList.clear();
+ }
+};
+
+#endif
diff --git a/offload/include/PluginManager.h b/offload/include/PluginManager.h
index ec3adadf0819b..ea1f3b6406ce7 100644
--- a/offload/include/PluginManager.h
+++ b/offload/include/PluginManager.h
@@ -35,6 +35,8 @@
#include <mutex>
#include <string>
+#include "OpenMP/InteropAPI.h"
+
using GenericPluginTy = llvm::omp::target::plugin::GenericPluginTy;
/// Struct for the data required to handle plugins
@@ -88,6 +90,9 @@ struct PluginManager {
HostPtrToTableMapTy HostPtrToTableMap;
std::mutex TblMapMtx; ///< For HostPtrToTableMap
+ /// Table of cached implicit interop objects
+ InteropTblTy InteropTbl;
+
// Work around for plugins that call dlopen on shared libraries that call
// tgt_register_lib during their initialisation. Stash the pointers in a
// vector until the plugins are all initialised and then register them.
@@ -185,5 +190,5 @@ void initRuntime();
void deinitRuntime();
extern PluginManager *PM;
-
+extern std::atomic<bool> RTLAlive; // Indicates if the RTL has been initialized
#endif // OMPTARGET_PLUGIN_MANAGER_H
diff --git a/offload/include/Shared/APITypes.h b/offload/include/Shared/APITypes.h
index 978b53d5d69b9..f376c7dc861f9 100644
--- a/offload/include/Shared/APITypes.h
+++ b/offload/include/Shared/APITypes.h
@@ -36,6 +36,7 @@ struct __tgt_device_image {
struct __tgt_device_info {
void *Context = nullptr;
void *Device = nullptr;
+ void *Platform = nullptr;
};
/// This struct is a record of all the host code that may be offloaded to a
diff --git a/offload/libomptarget/OffloadRTL.cpp b/offload/libomptarget/OffloadRTL.cpp
index 29b573a27d087..134ab7c95ac0b 100644
--- a/offload/libomptarget/OffloadRTL.cpp
+++ b/offload/libomptarget/OffloadRTL.cpp
@@ -22,6 +22,7 @@ extern void llvm::omp::target::ompt::connectLibrary();
static std::mutex PluginMtx;
static uint32_t RefCount = 0;
+std::atomic<bool> RTLAlive{false};
void initRuntime() {
std::scoped_lock<decltype(PluginMtx)> Lock(PluginMtx);
@@ -41,6 +42,9 @@ void initRuntime() {
PM->init();
PM->registerDelayedLibraries();
+
+ // RTL initialization is complete
+ RTLAlive = true;
}
}
@@ -50,6 +54,8 @@ void deinitRuntime() {
if (RefCount == 1) {
DP("Deinit offload library!\n");
+ // RTL deinitialization has started
+ RTLAlive = false;
PM->deinit();
delete PM;
PM = nullptr;
diff --git a/offload/libomptarget/OpenMP/API.cpp b/offload/libomptarget/OpenMP/API.cpp
index 4576f9bd06121..f61f56772504b 100644
--- a/offload/libomptarget/OpenMP/API.cpp
+++ b/offload/libomptarget/OpenMP/API.cpp
@@ -683,3 +683,15 @@ EXTERN void *omp_get_mapped_ptr(const void *Ptr, int DeviceNum) {
return TPR.TargetPointer;
}
+
+void syncImplicitInterops(int gtid, void *event);
+// This routine gets called from the Host RTL at sync points (taskwait, barrier,
+// ...) so we can synchronize the necessary objects from the offload side.
+EXTERN void __tgt_target_sync(ident_t *loc_ref, int gtid, void *current_task,
+ void *event) {
+
+ if (!RTLAlive)
+ return;
+
+ syncImplicitInterops(gtid, event);
+}
diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp
index bdbc440c64a2c..55e47d87a865d 100644
--- a/offload/libomptarget/OpenMP/InteropAPI.cpp
+++ b/offload/libomptarget/OpenMP/InteropAPI.cpp
@@ -10,6 +10,7 @@
#include "OpenMP/InternalTypes.h"
#include "OpenMP/omp.h"
+#include "OffloadPolicy.h"
#include "PluginManager.h"
#include "device.h"
#include "omptarget.h"
@@ -56,22 +57,22 @@ void getTypeMismatch(omp_interop_property_t Property, int *Err) {
*Err = getPropertyErrorType(Property);
}
-const char *getVendorIdToStr(const omp_foreign_runtime_ids_t VendorId) {
- switch (VendorId) {
- case cuda:
- return ("cuda");
- case cuda_driver:
- return ("cuda_driver");
- case opencl:
- return ("opencl");
- case sycl:
- return ("sycl");
- case hip:
- return ("hip");
- case level_zero:
- return ("level_zero");
- }
- return ("unknown");
+static const char *VendorStrTbl[] = {
+ "unknown", "amd", "arm", "bsc", "fujitsu", "gnu", "hpe",
+ "ibm", "intel", "llvm", "nec", "nvidia", "ti"};
+const char *getVendorIdToStr(const omp_vendor_id_t VendorId) {
+ if (VendorId < omp_vendor_unknown || VendorId >= omp_vendor_last)
+ return ("unknown");
+ return VendorStrTbl[VendorId];
+}
+
+static const char *ForeignRuntimeStrTbl[] = {
+ "none", "cuda", "cuda_driver", "opencl",
+ "sycl", "hip", "level_zero", "hsa"};
+const char *getForeignRuntimeIdToStr(const omp_foreign_runtime_id_t FrId) {
+ if (FrId < omp_fr_none || FrId >= omp_fr_last)
+ return ("unknown");
+ return ForeignRuntimeStrTbl[FrId];
}
template <typename PropertyTy>
@@ -83,7 +84,7 @@ intptr_t getProperty<intptr_t>(omp_interop_val_t &InteropVal,
omp_interop_property_t Property, int *Err) {
switch (Property) {
case omp_ipr_fr_id:
- return InteropVal.backend_type_id;
+ return InteropVal.fr_id;
case omp_ipr_vendor:
return InteropVal.vendor_id;
case omp_ipr_device_num:
@@ -99,10 +100,8 @@ const char *getProperty<const char *>(omp_interop_val_t &InteropVal,
omp_interop_property_t Property,
int *Err) {
switch (Property) {
- case omp_ipr_fr_id:
- return InteropVal.interop_type == kmp_interop_type_tasksync
- ? "tasksync"
- : "device+context";
+ case omp_ipr_fr_name:
+ return getForeignRuntimeIdToStr(InteropVal.fr_id);
case omp_ipr_vendor_name:
return getVendorIdToStr(InteropVal.vendor_id);
default:
@@ -120,6 +119,8 @@ void *getProperty<void *>(omp_interop_val_t &InteropVal,
return InteropVal.device_info.Device;
*Err = omp_irc_no_value;
return const_cast<char *>(InteropVal.err_str);
+ case omp_ipr_platform:
+ return InteropVal.device_info.Platform;
case omp_ipr_device_context:
return InteropVal.device_info.Context;
case omp_ipr_targetsync:
@@ -145,13 +146,13 @@ bool getPropertyCheck(omp_interop_val_t **InteropPtr,
return false;
}
if (Property == omp_ipr_targetsync &&
- (*InteropPtr)->interop_type != kmp_interop_type_tasksync) {
+ (*InteropPtr)->interop_type != kmp_interop_type_targetsync) {
if (Err)
*Err = omp_irc_other;
return false;
}
if ((Property == omp_ipr_device || Property == omp_ipr_device_context) &&
- (*InteropPtr)->interop_type == kmp_interop_type_tasksync) {
+ (*InteropPtr)->interop_type == kmp_interop_type_targetsync) {
if (Err)
*Err = omp_irc_other;
return false;
@@ -166,7 +167,7 @@ bool getPropertyCheck(omp_interop_val_t **InteropPtr,
omp_interop_property_t property_id, \
int *err) { \
omp_interop_val_t *interop_val = (omp_interop_val_t *)interop; \
- assert((interop_val)->interop_type == kmp_interop_type_tasksync); \
+ assert((interop_val)->interop_type == kmp_interop_type_targetsync); \
if (!getPropertyCheck(&interop_val, property_id, err)) { \
return (RETURN_TYPE)(0); \
} \
@@ -193,119 +194,263 @@ __OMP_GET_INTEROP_TY3(const char *, type_desc)
__OMP_GET_INTEROP_TY3(const char *, rc_desc)
#undef __OMP_GET_INTEROP_TY3
-static const char *copyErrorString(llvm::Error &&Err) {
- // TODO: Use the error string while avoiding leaks.
- std::string ErrMsg = llvm::toString(std::move(Err));
- char *UsrMsg = reinterpret_cast<char *>(malloc(ErrMsg.size() + 1));
- strcpy(UsrMsg, ErrMsg.c_str());
- return UsrMsg;
-}
-
extern "C" {
-void __tgt_interop_init(ident_t *LocRef, int32_t Gtid,
- omp_interop_val_t *&InteropPtr,
- kmp_interop_type_t InteropType, int32_t DeviceId,
- int32_t Ndeps, kmp_depend_info_t *DepList,
- int32_t HaveNowait) {
- int32_t NdepsNoalias = 0;
- kmp_depend_info_t *NoaliasDepList = NULL;
- assert(InteropType != kmp_interop_type_unknown &&
- "Cannot initialize with unknown interop_type!");
- if (DeviceId == -1) {
- DeviceId = omp_get_default_device();
+omp_interop_val_t *__tgt_interop_get(ident_t *LocRef, int32_t InteropType,
+ int64_t DeviceNum, int32_t NumPrefers,
+ interop_spec_t *Prefers,
+ interop_ctx_t *Ctx, dep_pack_t *Deps) {
+
+ DP("Call to %s with device_num %" PRId64 ", interop type %" PRId32
+ ", number of preferred specs %" PRId32 "%s%s\n",
+ __func__, DeviceNum, InteropType, NumPrefers,
+ Ctx->flags.implicit ? " (implicit)" : "",
+ Ctx->flags.nowait ? " (nowait)" : "");
+
+ if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED)
+ return omp_interop_none;
+
+ // Now, try to create an interop with device_num.
+ if (DeviceNum == OFFLOAD_DEVICE_DEFAULT)
+ DeviceNum = omp_get_default_device();
+
+ auto gtid = Ctx->gtid;
+
+ if (InteropType == kmp_interop_type_targetsync) {
+ if (Ctx->flags.nowait)
+ DP("Warning: nowait flag on interop creation not supported yet. "
+ "Ignored\n");
+ if (Deps)
+ __kmpc_omp_wait_deps(LocRef, gtid, Deps->ndeps, Deps->deplist,
+ Deps->ndeps_noalias, Deps->noalias_deplist);
}
- if (InteropType == kmp_interop_type_tasksync) {
- __kmpc_omp_wait_deps(LocRef, Gtid, Ndeps, DepList, NdepsNoalias,
- NoaliasDepList);
+ auto DeviceOrErr = PM->getDevice(DeviceNum);
+ if (!DeviceOrErr) {
+ [[maybe_unused]] std::string ErrStr = toString(DeviceOrErr.takeError());
+ DP("Couldn't find device %" PRId64
+ " while constructing interop object: %s\n",
+ DeviceNum, ErrStr.c_str());
+ return omp_interop_none;
+ }
+ auto &Device = *DeviceOrErr;
+ omp_interop_val_t *Interop = omp_interop_none;
+ auto InteropSpec = Device.RTL->select_interop_preference(
+ DeviceNum, InteropType, NumPrefers, Prefers);
+ if (InteropSpec.fr_id == omp_fr_none) {
+ DP("Interop request not supported by device %" PRId64 "\n", DeviceNum);
+ return omp_interop_none;
+ }
+ DP("Selected interop preference is fr_id=%s%s impl_attrs=%" PRId64 "\n",
+ getForeignRuntimeIdToStr((omp_foreign_runtime_id_t)InteropSpec.fr_id),
+ InteropSpec.attrs.inorder ? " inorder" : "", InteropSpec.impl_attrs);
+
+ if (Ctx->flags.implicit) {
+ // This is a request for an RTL managed interop object.
+ // Get it from the InteropTbl if possible
+ if (PM->InteropTbl.size() > 0) {
+ for (auto iop : PM->InteropTbl) {
+ if (iop->isCompatibleWith(InteropType, InteropSpec, DeviceNum, gtid)) {
+ Interop = iop;
+ Interop->markDirty();
+ DP("Reused interop " DPxMOD " from device number %" PRId64
+ " for gtid %" PRId32 "\n",
+ DPxPTR(Interop), DeviceNum, gtid);
+ return Interop;
+ }
+ }
+ }
}
- InteropPtr = new omp_interop_val_t(DeviceId, InteropType);
-
- auto DeviceOrErr = PM->getDevice(DeviceId);
- if (!DeviceOrErr) {
- InteropPtr->err_str = copyErrorString(DeviceOrErr.takeError());
- return;
+ Interop = Device.RTL->create_interop(DeviceNum, InteropType, &InteropSpec);
+ DP("Created an interop " DPxMOD " from device number %" PRId64 "\n",
+ DPxPTR(Interop), DeviceNum);
+
+ if (Ctx->flags.implicit) {
+ // register the new implicit interop in the RTL
+ Interop->setOwner(gtid);
+ Interop->markDirty();
+ PM->InteropTbl.add(Interop);
+ } else {
+ Interop->setOwner(-1);
}
- DeviceTy &Device = *DeviceOrErr;
- if (!Device.RTL ||
- Device.RTL->init_device_info(DeviceId, &(InteropPtr)->device_info,
- &(InteropPtr)->err_str)) {
- delete InteropPtr;
- InteropPtr = omp_interop_none;
+ return Interop;
+}
+
+int __tgt_interop_use(ident_t *LocRef, omp_interop_val_t *Interop,
+ interop_ctx_t *Ctx, dep_pack_t *Deps) {
+ bool nowait = Ctx->flags.nowait;
+ DP("Call to %s with interop " DPxMOD ", nowait %" PRId32 "\n", __func__,
+ DPxPTR(Interop), nowait);
+ if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop)
+ return OFFLOAD_FAIL;
+
+ if (!Interop)
+ return OFFLOAD_FAIL;
+
+ if (Interop->interop_type == kmp_interop_type_targetsync) {
+ if (Ctx->flags.nowait)
+ DP("Warning: nowait flag on interop use not supported yet. "
+ "Ignored\n");
+ if (Deps)
+ __kmpc_omp_wait_deps(LocRef, Ctx->gtid, Deps->ndeps, Deps->deplist,
+ Deps->ndeps_noalias, Deps->noalias_deplist);
}
- if (InteropType == kmp_interop_type_tasksync) {
- if (!Device.RTL ||
- Device.RTL->init_async_info(DeviceId, &(InteropPtr)->async_info)) {
- delete InteropPtr;
- InteropPtr = omp_interop_none;
+
+ if (Interop->async_info && Interop->async_info->Queue) {
+ if (nowait)
+ Interop->asyncBarrier();
+ else {
+ Interop->flush();
+ Interop->syncBarrier();
+ Interop->markClean();
}
}
+
+ return OFFLOAD_SUCCESS;
}
-void __tgt_interop_use(ident_t *LocRef, int32_t Gtid,
- omp_interop_val_t *&InteropPtr, int32_t DeviceId,
- int32_t Ndeps, kmp_depend_info_t *DepList,
- int32_t HaveNowait) {
- int32_t NdepsNoalias = 0;
- kmp_depend_info_t *NoaliasDepList = NULL;
- assert(InteropPtr && "Cannot use nullptr!");
- omp_interop_val_t *InteropVal = InteropPtr;
- if (DeviceId == -1) {
- DeviceId = omp_get_default_device();
- }
- assert(InteropVal != omp_interop_none &&
- "Cannot use uninitialized interop_ptr!");
- assert((DeviceId == -1 || InteropVal->device_id == DeviceId) &&
- "Inconsistent device-id usage!");
+int __tgt_interop_release(ident_t *LocRef, omp_interop_val_t *Interop,
+ interop_ctx_t *Ctx, dep_pack_t *Deps) {
+ DP("Call to %s with interop " DPxMOD "\n", __func__, DPxPTR(Interop));
- auto DeviceOrErr = PM->getDevice(DeviceId);
- if (!DeviceOrErr) {
- InteropPtr->err_str = copyErrorString(DeviceOrErr.takeError());
- return;
+ if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop)
+ return OFFLOAD_FAIL;
+
+ if (!Interop)
+ return OFFLOAD_FAIL;
+
+ if (Interop->interop_type == kmp_interop_type_targetsync) {
+ if (Ctx->flags.nowait)
+ DP("Warning: nowait flag on interop destroy not supported yet. "
+ "Ignored\n");
+ if (Deps) {
+ __kmpc_omp_wait_deps(LocRef, Ctx->gtid, Deps->ndeps, Deps->deplist,
+ Deps->ndeps_noalias, Deps->noalias_deplist);
+ }
}
- if (InteropVal->interop_type == kmp_interop_type_tasksync) {
- __kmpc_omp_wait_deps(LocRef, Gtid, Ndeps, DepList, NdepsNoalias,
- NoaliasDepList);
+ return Interop->release();
+}
+
+} // extern "C"
+
+bool omp_interop_val_t::isCompatibleWith(int32_t InteropType,
+ const interop_spec_t &Spec) {
+ if (interop_type != InteropType)
+ return false;
+ if (Spec.fr_id != fr_id)
+ return false;
+ if (Spec.attrs.inorder != attrs.inorder)
+ return false;
+ if (Spec.impl_attrs != impl_attrs)
+ return false;
+
+ return true;
+}
+
+bool omp_interop_val_t::isCompatibleWith(int32_t InteropType,
+ const interop_spec_t &Spec,
+ int64_t DeviceNum, int GTID) {
+ if (device_id != DeviceNum)
+ return false;
+
+ if (GTID != OwnerGtid)
+ return false;
+
+ return isCompatibleWith(InteropType, Spec);
+}
+
+int32_t omp_interop_val_t::flush(DeviceTy &Device) {
+ return Device.RTL->flush_queue(this);
+}
+
+int32_t omp_interop_val_t::sync_barrier(DeviceTy &Device) {
+ if (Device.RTL->sync_barrier(this) != OFFLOAD_SUCCESS) {
+ FATAL_MESSAGE(device_id, "Interop sync barrier failed for %p object\n",
+ this);
}
- // TODO Flush the queue associated with the interop through the plugin
+ DP("Calling completion callbacks for " DPxMOD "\n", DPxPTR(this));
+ runCompletionCbs();
+ return OFFLOAD_SUCCESS;
+}
+
+int32_t omp_interop_val_t::async_barrier(DeviceTy &Device) {
+ return Device.RTL->async_barrier(this);
}
-void __tgt_interop_destroy(ident_t *LocRef, int32_t Gtid,
- omp_interop_val_t *&InteropPtr, int32_t DeviceId,
- int32_t Ndeps, kmp_depend_info_t *DepList,
- int32_t HaveNowait) {
- int32_t NdepsNoalias = 0;
- kmp_depend_info_t *NoaliasDepList = NULL;
- assert(InteropPtr && "Cannot use nullptr!");
- omp_interop_val_t *InteropVal = InteropPtr;
- if (DeviceId == -1) {
- DeviceId = omp_get_default_device();
+int32_t omp_interop_val_t::release(DeviceTy &Device) {
+ if (async_info != nullptr && (!hasOwner() || !isClean())) {
+ flush();
+ syncBarrier();
}
+ return Device.RTL->release_interop(device_id, this);
+}
- if (InteropVal == omp_interop_none)
- return;
+int32_t omp_interop_val_t::flush() {
+ auto DeviceOrErr = PM->getDevice(device_id);
+ if (!DeviceOrErr)
+ FATAL_MESSAGE(device_id, "%s", toString(DeviceOrErr.takeError()).c_str());
+ DeviceTy &Device = *DeviceOrErr;
+ return flush(Device);
+}
- assert((DeviceId == -1 || InteropVal->device_id == DeviceId) &&
- "Inconsistent device-id usage!");
- auto DeviceOrErr = PM->getDevice(DeviceId);
- if (!DeviceOrErr) {
- InteropPtr->err_str = copyErrorString(DeviceOrErr.takeError());
+int32_t omp_interop_val_t::syncBarrier() {
+ auto DeviceOrErr = PM->getDevice(device_id);
+ if (!DeviceOrErr)
+ FATAL_MESSAGE(device_id, "%s", toString(DeviceOrErr.takeError()).c_str());
+ DeviceTy &Device = *DeviceOrErr;
+ return sync_barrier(Device);
+}
+
+int32_t omp_interop_val_t::asyncBarrier() {
+ auto DeviceOrErr = PM->getDevice(device_id);
+ if (!DeviceOrErr)
+ FATAL_MESSAGE(device_id, "%s", toString(DeviceOrErr.takeError()).c_str());
+ DeviceTy &Device = *DeviceOrErr;
+ return async_barrier(Device);
+}
+
+int32_t omp_interop_val_t::release() {
+ auto DeviceOrErr = PM->getDevice(device_id);
+ if (!DeviceOrErr)
+ FATAL_MESSAGE(device_id, "%s", toString(DeviceOrErr.takeError()).c_str());
+ DeviceTy &Device = *DeviceOrErr;
+ return release(Device);
+}
+
+void syncImplicitInterops(int gtid, void *event) {
+ if (PM->InteropTbl.size() == 0)
return;
- }
- if (InteropVal->interop_type == kmp_interop_type_tasksync) {
- __kmpc_omp_wait_deps(LocRef, Gtid, Ndeps, DepList, NdepsNoalias,
- NoaliasDepList);
+ DP("target_sync: syncing interops for gtid %" PRId32 ", event " DPxMOD "\n",
+ gtid, DPxPTR(event));
+
+ for (auto iop : PM->InteropTbl) {
+ if (iop->async_info && iop->async_info->Queue && iop->isOwnedBy(gtid) &&
+ !iop->isClean()) {
+
+ iop->flush();
+ iop->syncBarrier();
+ iop->markClean();
+
+ // TODO: Alternate implementation option
+ // Instead of using a synchronous barrier, queue an asynchronous
+ // barrier and create a proxy task associated to the event to handle
+ // OpenMP synchronizations.
+ // When the event is completed, fulfill the proxy task to notify the
+ // OpenMP runtime.
+ // event = iop->asyncBarrier();
+ // ptask = createProxyTask();
+ // Events->add(event,ptask);
+ }
}
- // TODO Flush the queue associated with the interop through the plugin
- // TODO Signal out dependences
-
- delete InteropPtr;
- InteropPtr = omp_interop_none;
+ // This would be needed for the alternate implementation
+ // processEvents();
}
-} // extern "C"
+void InteropTblTy::clear() {
+ DP("Clearing Interop Table\n");
+ PerThreadTable::clear([](auto &IOP) { IOP->release(); });
+}
diff --git a/offload/libomptarget/PluginManager.cpp b/offload/libomptarget/PluginManager.cpp
index 93589960a426d..2cc1314e7a4f0 100644
--- a/offload/libomptarget/PluginManager.cpp
+++ b/offload/libomptarget/PluginManager.cpp
@@ -128,6 +128,12 @@ void PluginManager::initializeAllDevices() {
initializeDevice(Plugin, DeviceId);
}
}
+ // After all plugins are initialized, register atExit cleanup handlers
+ std::atexit([]() {
+ // Interop cleanup should be done before the plugins are deinitialized as
+ // the backend libraries may be already unloaded.
+ PM->InteropTbl.clear();
+ });
}
// Returns a pointer to the binary descriptor, upgrading from a legacy format if
diff --git a/offload/libomptarget/exports b/offload/libomptarget/exports
index 2406776c1fb5f..b40d9b22a1be9 100644
--- a/offload/libomptarget/exports
+++ b/offload/libomptarget/exports
@@ -67,9 +67,10 @@ VERS1.0 {
omp_get_interop_int;
omp_get_interop_name;
omp_get_interop_type_desc;
- __tgt_interop_init;
+ __tgt_interop_get;
__tgt_interop_use;
- __tgt_interop_destroy;
+ __tgt_interop_release;
+ __tgt_target_sync;
__llvmPushCallConfiguration;
__llvmPopCallConfiguration;
llvmLaunchKernel;
diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h
index d2437908a0a6f..40a428dbccb06 100644
--- a/offload/plugins-nextgen/common/include/PluginInterface.h
+++ b/offload/plugins-nextgen/common/include/PluginInterface.h
@@ -20,6 +20,7 @@
#include <vector>
#include "ExclusiveAccess.h"
+#include "OpenMP/InteropAPI.h"
#include "Shared/APITypes.h"
#include "Shared/Debug.h"
#include "Shared/Environment.h"
@@ -937,6 +938,21 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
bool useAutoZeroCopy();
virtual bool useAutoZeroCopyImpl() { return false; }
+ virtual omp_interop_val_t *createInterop(int32_t InteropType,
+ interop_spec_t &InteropSpec) {
+ return nullptr;
+ }
+
+ virtual int32_t releaseInterop(omp_interop_val_t *Interop) {
+ return OFFLOAD_SUCCESS;
+ }
+
+ virtual interop_spec_t selectInteropPreference(int32_t InteropType,
+ int32_t NumPrefers,
+ interop_spec_t *Prefers) {
+ return interop_spec_t{omp_fr_none, {false, 0}, 0};
+ }
+
/// Allocate and construct a kernel object.
virtual Expected<GenericKernelTy &> constructKernel(const char *Name) = 0;
@@ -1342,6 +1358,45 @@ struct GenericPluginTy {
int32_t get_function(__tgt_device_binary Binary, const char *Name,
void **KernelPtr);
+ /// Return the interop specification that the plugin supports
+ /// It might not be one of the user specified ones.
+ interop_spec_t select_interop_preference(int32_t ID, int32_t InteropType,
+ int32_t NumPrefers,
+ interop_spec_t *Prefers) {
+ auto &Device = getDevice(ID);
+ return Device.selectInteropPreference(InteropType, NumPrefers, Prefers);
+ }
+
+ /// Create OpenMP interop with the given interop context
+ omp_interop_val_t *create_interop(int32_t ID, int32_t InteropContext,
+ interop_spec_t *InteropSpec) {
+ auto &Device = getDevice(ID);
+ return Device.createInterop(InteropContext, *InteropSpec);
+ }
+
+ /// Release OpenMP interop object
+ int32_t release_interop(int32_t ID, omp_interop_val_t *Interop) {
+ auto &Device = getDevice(ID);
+ return Device.releaseInterop(Interop);
+ }
+
+ /// Flush the queue associated with the interop object if necessary
+ virtual int32_t flush_queue(omp_interop_val_t *Interop) {
+ return OFFLOAD_SUCCESS;
+ }
+
+ /// Queue a synchronous barrier in the queue associated with the interop
+ /// object and wait for it to complete.
+ virtual int32_t sync_barrier(omp_interop_val_t *Interop) {
+ return OFFLOAD_FAIL;
+ }
+
+ /// Queue an asynchronous barrier in the queue associated with the interop
+ /// object and return immediately.
+ virtual int32_t async_barrier(omp_interop_val_t *Interop) {
+ return OFFLOAD_FAIL;
+ }
+
private:
/// Indicates if the platform runtime has been fully initialized.
bool Initialized = false;
diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index a2cacc8792b15..9c4939b029861 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -4665,6 +4665,13 @@ static inline int __kmp_adjust_gtid_for_hidden_helpers(int gtid) {
return adjusted_gtid;
}
+#if ENABLE_LIBOMPTARGET
+// Pointers to callbacks registered by the offload library to be notified of
+// task progress.
+extern void (*kmp_target_sync_cb)(ident_t *loc_ref, int gtid,
+ void *current_task, void *event);
+#endif // ENABLE_LIBOMPTARGET
+
// Support for error directive
typedef enum kmp_severity_t {
severity_warning = 1,
diff --git a/openmp/runtime/src/kmp_barrier.cpp b/openmp/runtime/src/kmp_barrier.cpp
index d7ef57c608149..c6908c35fc3d9 100644
--- a/openmp/runtime/src/kmp_barrier.cpp
+++ b/openmp/runtime/src/kmp_barrier.cpp
@@ -1828,6 +1828,14 @@ static int __kmp_barrier_template(enum barrier_type bt, int gtid, int is_split,
}
#endif
+#if ENABLE_LIBOMPTARGET
+ // Give an opportunity to the offload runtime to make progress and create
+ // proxy tasks if necessary
+ if (UNLIKELY(kmp_target_sync_cb != NULL))
+ (*kmp_target_sync_cb)(
+ NULL, gtid, KMP_TASKDATA_TO_TASK(this_thr->th.th_current_task), NULL);
+#endif
+
if (!team->t.t_serialized) {
#if USE_ITT_BUILD
// This value will be used in itt notify events below.
diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp
index 417eceb8ebecc..d99d1a410b5d3 100644
--- a/openmp/runtime/src/kmp_runtime.cpp
+++ b/openmp/runtime/src/kmp_runtime.cpp
@@ -93,6 +93,9 @@ static void __kmp_partition_places(kmp_team_t *team,
int update_master_only = 0);
#endif
static void __kmp_do_serial_initialize(void);
+#if ENABLE_LIBOMPTARGET
+static void __kmp_target_init(void);
+#endif // ENABLE_LIBOMPTARGET
void __kmp_fork_barrier(int gtid, int tid);
void __kmp_join_barrier(int gtid);
void __kmp_setup_icv_copy(kmp_team_t *team, int new_nproc,
@@ -7173,6 +7176,9 @@ static void __kmp_do_serial_initialize(void) {
#if KMP_MIC_SUPPORTED
__kmp_check_mic_type();
#endif
+#if ENABLE_LIBOMPTARGET
+ __kmp_target_init();
+#endif /* ENABLE_LIBOMPTARGET */
// Some global variable initialization moved here from kmp_env_initialize()
#ifdef KMP_DEBUG
@@ -9386,6 +9392,15 @@ void __kmp_set_nesting_mode_threads() {
set__max_active_levels(thread, __kmp_nesting_mode_nlevels);
}
+#if ENABLE_LIBOMPTARGET
+void (*kmp_target_sync_cb)(ident_t *loc_ref, int gtid, void *current_task,
+ void *event) = NULL;
+void __kmp_target_init() {
+ // Look for hooks in the libomptarget library
+ *(void **)(&kmp_target_sync_cb) = KMP_DLSYM("__tgt_target_sync");
+}
+#endif // ENABLE_LIBOMPTARGET
+
// Empty symbols to export (see exports_so.txt) when feature is disabled
extern "C" {
#if !KMP_STATS_ENABLED
diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp
index 3d85a29423540..d45e3d690510e 100644
--- a/openmp/runtime/src/kmp_tasking.cpp
+++ b/openmp/runtime/src/kmp_tasking.cpp
@@ -1378,6 +1378,13 @@ void __kmp_init_implicit_task(ident_t *loc_ref, kmp_info_t *this_thr,
// thread: thread data structure corresponding to implicit task
void __kmp_finish_implicit_task(kmp_info_t *thread) {
kmp_taskdata_t *task = thread->th.th_current_task;
+#if ENABLE_LIBOMPTARGET
+ // Give an opportunity to the offload runtime to synchronize any unfinished
+ // target async regions before finishing the implicit task
+ if (UNLIKELY(kmp_target_sync_cb != NULL))
+ (*kmp_target_sync_cb)(NULL, thread->th.th_info.ds.ds_gtid,
+ KMP_TASKDATA_TO_TASK(task), NULL);
+#endif // ENABLE_LIBOMPTARGET
if (task->td_dephash) {
int children;
task->td_flags.complete = 1;
@@ -2249,6 +2256,14 @@ static kmp_int32 __kmpc_omp_taskwait_template(ident_t *loc_ref, kmp_int32 gtid,
}
#endif // OMPT_SUPPORT && OMPT_OPTIONAL
+#if ENABLE_LIBOMPTARGET
+ // Give an opportunity to the offload runtime to make progress and create
+ // any necessary proxy tasks
+ if (UNLIKELY(kmp_target_sync_cb))
+ (*kmp_target_sync_cb)(loc_ref, gtid, KMP_TASKDATA_TO_TASK(taskdata),
+ NULL);
+#endif // ENABLE_LIBOMPTARGET
+
// Debugger: The taskwait is active. Store location and thread encountered the
// taskwait.
#if USE_ITT_BUILD
@@ -2948,6 +2963,13 @@ void __kmpc_end_taskgroup(ident_t *loc, int gtid) {
}
#endif
+#if ENABLE_LIBOMPTARGET
+ // Give an opportunity to the offload runtime to make progress and create
+ // any necessary proxy tasks
+ if (UNLIKELY(kmp_target_sync_cb))
+ (*kmp_target_sync_cb)(loc, gtid, KMP_TASKDATA_TO_TASK(taskdata), NULL);
+#endif // ENABLE_LIBOMPTARGET
+
if (!taskdata->td_flags.team_serial ||
(thread->th.th_task_team != NULL &&
(thread->th.th_task_team->tt.tt_found_proxy_tasks ||
@@ -3391,6 +3413,13 @@ static inline int __kmp_execute_tasks_template(
while (1) { // Outer loop keeps trying to find tasks in case of single thread
// getting tasks from target constructs
while (1) { // Inner loop to find a task and execute it
+#if ENABLE_LIBOMPTARGET
+ // Give an opportunity to the offload runtime to make progress
+ if (UNLIKELY(kmp_target_sync_cb))
+ (*kmp_target_sync_cb)(NULL, gtid, KMP_TASKDATA_TO_TASK(current_task),
+ NULL);
+#endif // ENABLE_LIBOMPTARGET
+
task = NULL;
if (task_team->tt.tt_num_task_pri) { // get priority task first
task = __kmp_get_priority_task(gtid, task_team, is_constrained);
>From 81c7402e96fc54b90cf9a1eb46f9f6c2d22ccd13 Mon Sep 17 00:00:00 2001
From: Alex Duran <alejandro.duran at intel.com>
Date: Wed, 2 Jul 2025 13:55:30 +0200
Subject: [PATCH 2/3] Add missed ext API and minor fix
---
offload/libomptarget/OpenMP/InteropAPI.cpp | 34 +++++++++++++++++++---
offload/libomptarget/exports | 1 +
2 files changed, 31 insertions(+), 4 deletions(-)
diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp
index 55e47d87a865d..14b1f85802464 100644
--- a/offload/libomptarget/OpenMP/InteropAPI.cpp
+++ b/offload/libomptarget/OpenMP/InteropAPI.cpp
@@ -290,12 +290,16 @@ int __tgt_interop_use(ident_t *LocRef, omp_interop_val_t *Interop,
return OFFLOAD_FAIL;
if (Interop->interop_type == kmp_interop_type_targetsync) {
- if (Ctx->flags.nowait)
- DP("Warning: nowait flag on interop use not supported yet. "
- "Ignored\n");
- if (Deps)
+ if (Deps) {
+ if (nowait) {
+ DP("Warning: nowait flag on interop use with dependences not supported yet. "
+ "Ignored\n");
+ nowait = false;
+ }
+
__kmpc_omp_wait_deps(LocRef, Ctx->gtid, Deps->ndeps, Deps->deplist,
Deps->ndeps_noalias, Deps->noalias_deplist);
+ }
}
if (Interop->async_info && Interop->async_info->Queue) {
@@ -334,6 +338,28 @@ int __tgt_interop_release(ident_t *LocRef, omp_interop_val_t *Interop,
return Interop->release();
}
+
+EXTERN int ompx_interop_add_completion_callback(omp_interop_val_t *Interop,
+ ompx_interop_cb_t *cb,
+ void *data) {
+ DP("Call to %s with interop " DPxMOD ", property callback " DPxMOD
+ "and data " DPxMOD "\n",
+ __func__, DPxPTR(Interop), DPxPTR(cb), DPxPTR(data));
+
+ if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop)
+ return omp_irc_other;
+
+ if (!Interop) {
+ DP("Call to %s with invalid interop\n", __func__);
+ return omp_irc_empty;
+ }
+
+ Interop->addCompletionCb(cb, data);
+
+ return omp_irc_success;
+}
+
+
} // extern "C"
bool omp_interop_val_t::isCompatibleWith(int32_t InteropType,
diff --git a/offload/libomptarget/exports b/offload/libomptarget/exports
index b40d9b22a1be9..8e2db6ba8bba4 100644
--- a/offload/libomptarget/exports
+++ b/offload/libomptarget/exports
@@ -36,6 +36,7 @@ VERS1.0 {
__kmpc_push_target_tripcount;
__kmpc_push_target_tripcount_mapper;
ompx_dump_mapping_tables;
+ ompx_interop_add_completion_callback;
omp_get_mapped_ptr;
omp_get_num_devices;
omp_get_device_num;
>From 0a2825532e274fde73c3633ea471e9c9f3050f48 Mon Sep 17 00:00:00 2001
From: Alex Duran <alejandro.duran at intel.com>
Date: Wed, 2 Jul 2025 14:13:38 +0200
Subject: [PATCH 3/3] Fix format
---
offload/libomptarget/OpenMP/InteropAPI.cpp | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp
index 14b1f85802464..c6413431b3e13 100644
--- a/offload/libomptarget/OpenMP/InteropAPI.cpp
+++ b/offload/libomptarget/OpenMP/InteropAPI.cpp
@@ -327,8 +327,8 @@ int __tgt_interop_release(ident_t *LocRef, omp_interop_val_t *Interop,
if (Interop->interop_type == kmp_interop_type_targetsync) {
if (Ctx->flags.nowait)
- DP("Warning: nowait flag on interop destroy not supported yet. "
- "Ignored\n");
+ DP("Warning: nowait flag on interop destroy not supported "
+ "yet. Ignored\n");
if (Deps) {
__kmpc_omp_wait_deps(LocRef, Ctx->gtid, Deps->ndeps, Deps->deplist,
Deps->ndeps_noalias, Deps->noalias_deplist);
@@ -338,7 +338,6 @@ int __tgt_interop_release(ident_t *LocRef, omp_interop_val_t *Interop,
return Interop->release();
}
-
EXTERN int ompx_interop_add_completion_callback(omp_interop_val_t *Interop,
ompx_interop_cb_t *cb,
void *data) {
@@ -359,7 +358,6 @@ EXTERN int ompx_interop_add_completion_callback(omp_interop_val_t *Interop,
return omp_irc_success;
}
-
} // extern "C"
bool omp_interop_val_t::isCompatibleWith(int32_t InteropType,
More information about the Openmp-commits
mailing list