[Openmp-commits] [llvm] [openmp] [OFFLOAD][OPENMP] 6.0 compatible interop interface (PR #143491)

Alex Duran via Openmp-commits openmp-commits at lists.llvm.org
Wed Jul 2 05:13:53 PDT 2025


https://github.com/adurang updated https://github.com/llvm/llvm-project/pull/143491

>From 8694e6ec1dfa1300641854945c86b15c8d63966e Mon Sep 17 00:00:00 2001
From: Alex Duran <alejandro.duran at intel.com>
Date: Tue, 10 Jun 2025 10:39:29 +0200
Subject: [PATCH 1/3] [OFFLOAD][OPENMP] 6.0 compatible interop interface

The following patch introduces a new interop interface implementation with
the following characteristics:

* It supports the new 6.0 prefer_type specification
* It supports both explicit objects (from interop constructs) and
  implicit objects (from variant calls).
* Implements a per-thread reuse mechanism for implicit objects to reduce
  overheads.
* It provides a plugin interface that allows selecting the supported
  interop types, and managing all the backend related interop operations
(init, sync, ...).
* It enables cooperation with the OpenMP runtime to allow
  progress on OpenMP synchronizations.
* It cleanups some vendor/fr_id mismatchs from the current query
  routines.
* It supports extension to define interop callbacks for library cleanup.
---
 offload/include/OpenMP/InteropAPI.h           | 149 ++++++-
 offload/include/OpenMP/omp.h                  |  51 +--
 offload/include/PerThreadTable.h              | 109 +++++
 offload/include/PluginManager.h               |   7 +-
 offload/include/Shared/APITypes.h             |   1 +
 offload/libomptarget/OffloadRTL.cpp           |   6 +
 offload/libomptarget/OpenMP/API.cpp           |  12 +
 offload/libomptarget/OpenMP/InteropAPI.cpp    | 371 ++++++++++++------
 offload/libomptarget/PluginManager.cpp        |   6 +
 offload/libomptarget/exports                  |   5 +-
 .../common/include/PluginInterface.h          |  55 +++
 openmp/runtime/src/kmp.h                      |   7 +
 openmp/runtime/src/kmp_barrier.cpp            |   8 +
 openmp/runtime/src/kmp_runtime.cpp            |  15 +
 openmp/runtime/src/kmp_tasking.cpp            |  29 ++
 15 files changed, 688 insertions(+), 143 deletions(-)
 create mode 100644 offload/include/PerThreadTable.h

diff --git a/offload/include/OpenMP/InteropAPI.h b/offload/include/OpenMP/InteropAPI.h
index 71c78760a3226..61cbedf06a9a6 100644
--- a/offload/include/OpenMP/InteropAPI.h
+++ b/offload/include/OpenMP/InteropAPI.h
@@ -13,17 +13,70 @@
 
 #include "omp.h"
 
+#include "PerThreadTable.h"
 #include "omptarget.h"
 
 extern "C" {
 
 typedef enum kmp_interop_type_t {
   kmp_interop_type_unknown = -1,
-  kmp_interop_type_platform,
-  kmp_interop_type_device,
-  kmp_interop_type_tasksync,
+  kmp_interop_type_target,
+  kmp_interop_type_targetsync,
 } kmp_interop_type_t;
 
+struct interop_attrs_t {
+  bool inorder : 1;
+  int reserved : 31;
+
+  /* Check if the supported attributes are compatible with the current
+     attributes. Only if an attribute is supported can the value be true,
+     otherwise it needs to be false
+  */
+  bool checkSupportedOnly(interop_attrs_t supported) const {
+    return supported.inorder || (!supported.inorder && !inorder);
+  }
+};
+
+struct interop_spec_t {
+  int32_t fr_id;
+  interop_attrs_t attrs; // Common attributes
+  int64_t impl_attrs; // Implementation specific attributes (recognized by each
+                      // plugin)
+};
+
+struct interop_flags_t {
+  bool implicit : 1; // dispatch (true) or interop (false)
+  bool nowait : 1;   // has nowait flag
+  int reserved : 30;
+};
+
+struct interop_ctx_t {
+  uint16_t version; // version of the interface (current is 0)
+  interop_flags_t flags;
+  int gtid;
+};
+
+struct dep_pack_t {
+  int32_t ndeps;
+  kmp_depend_info_t *deplist;
+  int32_t ndeps_noalias;
+  kmp_depend_info_t *noalias_deplist;
+};
+
+struct omp_interop_val_t;
+
+typedef void ompx_interop_cb_t(omp_interop_val_t *interop, void *data);
+
+struct omp_interop_cb_instance_t {
+  ompx_interop_cb_t *cb;
+  void *data;
+
+  omp_interop_cb_instance_t(ompx_interop_cb_t *cb, void *data)
+      : cb(cb), data(data) {}
+
+  void operator()(omp_interop_val_t *interop) { cb(interop, data); }
+};
+
 /// The interop value type, aka. the interop object.
 typedef struct omp_interop_val_t {
   /// Device and interop-type are determined at construction time and fix.
@@ -34,10 +87,96 @@ typedef struct omp_interop_val_t {
   __tgt_device_info device_info;
   const kmp_interop_type_t interop_type;
   const intptr_t device_id;
-  const omp_foreign_runtime_ids_t vendor_id = cuda;
-  const intptr_t backend_type_id = omp_interop_backend_type_cuda_1;
+  omp_vendor_id_t vendor_id = omp_vendor_llvm;
+  omp_foreign_runtime_id_t fr_id = omp_fr_none;
+  interop_attrs_t attrs{false, 0}; // Common prefer specification attributes
+  int64_t impl_attrs = 0; // Implementation prefer specification attributes
+
+  void *RTLProperty = nullptr; // Plugin dependent information
+  // For implicitly created Interop objects (e.g., from a dispatch construct)
+  // who owns the object
+  int OwnerGtid = -1;
+  // Marks whether the object was requested since the last time it was synced
+  bool Clean = true;
+
+  typedef llvm::SmallVector<omp_interop_cb_instance_t> callback_list_t;
+
+  callback_list_t CompletionCbs;
+
+  void reset() {
+    OwnerGtid = -1;
+    markClean();
+    clearCompletionCbs();
+  }
+
+  bool hasOwner() const { return OwnerGtid != -1; }
+
+  void setOwner(int gtid) { OwnerGtid = gtid; }
+  bool isOwnedBy(int gtid) { return OwnerGtid == gtid; }
+  bool isCompatibleWith(int32_t InteropType, const interop_spec_t &Spec);
+  bool isCompatibleWith(int32_t InteropType, const interop_spec_t &Spec,
+                        int64_t DeviceNum, int gtid);
+  void markClean() { Clean = true; }
+  void markDirty() { Clean = false; }
+  bool isClean() const { return Clean; }
+
+  int32_t flush(DeviceTy &Device);
+  int32_t sync_barrier(DeviceTy &Device);
+  int32_t async_barrier(DeviceTy &Device);
+  int32_t release(DeviceTy &Device);
+
+  int32_t flush();
+  int32_t syncBarrier();
+  int32_t asyncBarrier();
+  int32_t release();
+
+  void addCompletionCb(ompx_interop_cb_t *cb, void *data) {
+    CompletionCbs.push_back(omp_interop_cb_instance_t(cb, data));
+  }
+
+  int numCompletionCbs() const { return CompletionCbs.size(); }
+  void clearCompletionCbs() { CompletionCbs.clear(); }
+
+  void runCompletionCbs() {
+    for (auto &cbInstance : CompletionCbs)
+      cbInstance(this);
+    clearCompletionCbs();
+  }
 } omp_interop_val_t;
 
 } // extern "C"
 
+struct InteropTableEntry {
+  using ContainerTy = typename std::vector<omp_interop_val_t *>;
+  using iterator = typename ContainerTy::iterator;
+
+  ContainerTy Interops;
+
+  const int reservedEntriesPerThread =
+      20; // reserve some entries to avoid reallocation
+
+  void add(omp_interop_val_t *obj) {
+    if (Interops.capacity() == 0)
+      Interops.reserve(reservedEntriesPerThread);
+    Interops.push_back(obj);
+  }
+
+  template <class ClearFuncTy> void clear(ClearFuncTy f) {
+    for (auto &Obj : Interops) {
+      f(Obj);
+    }
+  }
+
+  /* vector interface */
+  int size() const { return Interops.size(); }
+  iterator begin() { return Interops.begin(); }
+  iterator end() { return Interops.end(); }
+  iterator erase(iterator it) { return Interops.erase(it); }
+};
+
+struct InteropTblTy
+    : public PerThreadTable<InteropTableEntry, omp_interop_val_t *> {
+  void clear();
+};
+
 #endif // OMPTARGET_OPENMP_INTEROP_API_H
diff --git a/offload/include/OpenMP/omp.h b/offload/include/OpenMP/omp.h
index b44c6aff1b289..67b3bab9e8599 100644
--- a/offload/include/OpenMP/omp.h
+++ b/offload/include/OpenMP/omp.h
@@ -80,15 +80,18 @@ typedef enum omp_interop_rc {
   omp_irc_other = -6
 } omp_interop_rc_t;
 
-typedef enum omp_interop_fr {
-  omp_ifr_cuda = 1,
-  omp_ifr_cuda_driver = 2,
-  omp_ifr_opencl = 3,
-  omp_ifr_sycl = 4,
-  omp_ifr_hip = 5,
-  omp_ifr_level_zero = 6,
-  omp_ifr_last = 7
-} omp_interop_fr_t;
+/* Foreign runtime values from OpenMP Additional Definitions document v2.1 */
+typedef enum omp_foreign_runtime_id_t {
+  omp_fr_none = 0,
+  omp_fr_cuda = 1,
+  omp_fr_cuda_driver = 2,
+  omp_fr_opencl = 3,
+  omp_fr_sycl = 4,
+  omp_fr_hip = 5,
+  omp_fr_level_zero = 6,
+  omp_fr_hsa = 7,
+  omp_fr_last = 8
+} omp_foreign_runtime_id_t;
 
 typedef void *omp_interop_t;
 
@@ -134,19 +137,23 @@ omp_get_interop_type_desc(const omp_interop_t, omp_interop_property_t);
 extern const char *__KAI_KMPC_CONVENTION
 omp_get_interop_rc_desc(const omp_interop_t, omp_interop_rc_t);
 
-typedef enum omp_interop_backend_type_t {
-  // reserve 0
-  omp_interop_backend_type_cuda_1 = 1,
-} omp_interop_backend_type_t;
-
-typedef enum omp_foreign_runtime_ids {
-  cuda = 1,
-  cuda_driver = 2,
-  opencl = 3,
-  sycl = 4,
-  hip = 5,
-  level_zero = 6,
-} omp_foreign_runtime_ids_t;
+/* Vendor defined values from OpenMP Additional Definitions document v2.1*/
+typedef enum omp_vendor_id {
+  omp_vendor_unknown = 0,
+  omp_vendor_amd = 1,
+  omp_vendor_arm = 2,
+  omp_vendor_bsc = 3,
+  omp_vendor_fujitsu = 4,
+  omp_vendor_gnu = 5,
+  omp_vendor_hpe = 6,
+  omp_vendor_ibm = 7,
+  omp_vendor_intel = 8,
+  omp_vendor_llvm = 9,
+  omp_vendor_nec = 10,
+  omp_vendor_nvidia = 11,
+  omp_vendor_ti = 12,
+  omp_vendor_last = 13
+} omp_vendor_id_t;
 
 ///} InteropAPI
 
diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h
new file mode 100644
index 0000000000000..1e20b56c734d2
--- /dev/null
+++ b/offload/include/PerThreadTable.h
@@ -0,0 +1,109 @@
+//===-- PerThreadTable.h -- PerThread Storage Structure ----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Table indexed with one entry per thread.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef OFFLOAD_PERTHREADTABLE_H
+#define OFFLOAD_PERTHREADTABLE_H
+
+#include <list>
+#include <memory>
+#include <mutex>
+
+// Using an STL container (such as std::vector) indexed by thread ID has
+// too many race conditions issues so we store each thread entry into a
+// thread_local variable.
+// T is the container type used to store the objects, e.g., std::vector,
+// std::set, etc. by each thread. O is the type of the stored objects e.g.,
+// omp_interop_val_t *, ...
+
+template <typename ContainerType, typename ObjectType> struct PerThreadTable {
+  using iterator = typename ContainerType::iterator;
+
+  struct PerThreadData {
+    size_t NElements = 0;
+    std::unique_ptr<ContainerType> ThEntry;
+  };
+
+  std::mutex Mtx;
+  std::list<PerThreadData *> ThreadDataList;
+
+  // define default constructors, disable copy and move constructors
+  PerThreadTable() = default;
+  PerThreadTable(const PerThreadTable &) = delete;
+  PerThreadTable(PerThreadTable &&) = delete;
+  PerThreadTable &operator=(const PerThreadTable &) = delete;
+  PerThreadTable &operator=(PerThreadTable &&) = delete;
+  ~PerThreadTable() {
+    std::lock_guard<std::mutex> Lock(Mtx);
+    ThreadDataList.clear();
+  }
+
+private:
+  PerThreadData &getThreadData() {
+    static thread_local PerThreadData ThData;
+    return ThData;
+  }
+
+protected:
+  ContainerType &getThreadEntry() {
+    auto &ThData = getThreadData();
+    if (ThData.ThEntry)
+      return *ThData.ThEntry;
+    ThData.ThEntry = std::make_unique<ContainerType>();
+    std::lock_guard<std::mutex> Lock(Mtx);
+    ThreadDataList.push_back(&ThData);
+    return *ThData.ThEntry;
+  }
+
+  size_t &getThreadNElements() {
+    auto &ThData = getThreadData();
+    return ThData.NElements;
+  }
+
+public:
+  void add(ObjectType obj) {
+    auto &Entry = getThreadEntry();
+    auto &NElements = getThreadNElements();
+    NElements++;
+    Entry.add(obj);
+  }
+
+  iterator erase(iterator it) {
+    auto &Entry = getThreadEntry();
+    auto &NElements = getThreadNElements();
+    NElements--;
+    return Entry.erase(it);
+  }
+
+  size_t size() { return getThreadNElements(); }
+
+  // Iterators to traverse objects owned by
+  // the current thread
+  iterator begin() {
+    auto &Entry = getThreadEntry();
+    return Entry.begin();
+  }
+  iterator end() {
+    auto &Entry = getThreadEntry();
+    return Entry.end();
+  }
+
+  template <class F> void clear(F f) {
+    std::lock_guard<std::mutex> Lock(Mtx);
+    for (auto ThData : ThreadDataList) {
+      ThData->ThEntry->clear(f);
+      ThData->NElements = 0;
+    }
+    ThreadDataList.clear();
+  }
+};
+
+#endif
diff --git a/offload/include/PluginManager.h b/offload/include/PluginManager.h
index ec3adadf0819b..ea1f3b6406ce7 100644
--- a/offload/include/PluginManager.h
+++ b/offload/include/PluginManager.h
@@ -35,6 +35,8 @@
 #include <mutex>
 #include <string>
 
+#include "OpenMP/InteropAPI.h"
+
 using GenericPluginTy = llvm::omp::target::plugin::GenericPluginTy;
 
 /// Struct for the data required to handle plugins
@@ -88,6 +90,9 @@ struct PluginManager {
   HostPtrToTableMapTy HostPtrToTableMap;
   std::mutex TblMapMtx; ///< For HostPtrToTableMap
 
+  /// Table of cached implicit interop objects
+  InteropTblTy InteropTbl;
+
   // Work around for plugins that call dlopen on shared libraries that call
   // tgt_register_lib during their initialisation. Stash the pointers in a
   // vector until the plugins are all initialised and then register them.
@@ -185,5 +190,5 @@ void initRuntime();
 void deinitRuntime();
 
 extern PluginManager *PM;
-
+extern std::atomic<bool> RTLAlive; // Indicates if the RTL has been initialized
 #endif // OMPTARGET_PLUGIN_MANAGER_H
diff --git a/offload/include/Shared/APITypes.h b/offload/include/Shared/APITypes.h
index 978b53d5d69b9..f376c7dc861f9 100644
--- a/offload/include/Shared/APITypes.h
+++ b/offload/include/Shared/APITypes.h
@@ -36,6 +36,7 @@ struct __tgt_device_image {
 struct __tgt_device_info {
   void *Context = nullptr;
   void *Device = nullptr;
+  void *Platform = nullptr;
 };
 
 /// This struct is a record of all the host code that may be offloaded to a
diff --git a/offload/libomptarget/OffloadRTL.cpp b/offload/libomptarget/OffloadRTL.cpp
index 29b573a27d087..134ab7c95ac0b 100644
--- a/offload/libomptarget/OffloadRTL.cpp
+++ b/offload/libomptarget/OffloadRTL.cpp
@@ -22,6 +22,7 @@ extern void llvm::omp::target::ompt::connectLibrary();
 
 static std::mutex PluginMtx;
 static uint32_t RefCount = 0;
+std::atomic<bool> RTLAlive{false};
 
 void initRuntime() {
   std::scoped_lock<decltype(PluginMtx)> Lock(PluginMtx);
@@ -41,6 +42,9 @@ void initRuntime() {
 
     PM->init();
     PM->registerDelayedLibraries();
+
+    // RTL initialization is complete
+    RTLAlive = true;
   }
 }
 
@@ -50,6 +54,8 @@ void deinitRuntime() {
 
   if (RefCount == 1) {
     DP("Deinit offload library!\n");
+    // RTL deinitialization has started
+    RTLAlive = false;
     PM->deinit();
     delete PM;
     PM = nullptr;
diff --git a/offload/libomptarget/OpenMP/API.cpp b/offload/libomptarget/OpenMP/API.cpp
index 4576f9bd06121..f61f56772504b 100644
--- a/offload/libomptarget/OpenMP/API.cpp
+++ b/offload/libomptarget/OpenMP/API.cpp
@@ -683,3 +683,15 @@ EXTERN void *omp_get_mapped_ptr(const void *Ptr, int DeviceNum) {
 
   return TPR.TargetPointer;
 }
+
+void syncImplicitInterops(int gtid, void *event);
+// This routine gets called from the Host RTL at sync points (taskwait, barrier,
+// ...) so we can synchronize the necessary objects from the offload side.
+EXTERN void __tgt_target_sync(ident_t *loc_ref, int gtid, void *current_task,
+                              void *event) {
+
+  if (!RTLAlive)
+    return;
+
+  syncImplicitInterops(gtid, event);
+}
diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp
index bdbc440c64a2c..55e47d87a865d 100644
--- a/offload/libomptarget/OpenMP/InteropAPI.cpp
+++ b/offload/libomptarget/OpenMP/InteropAPI.cpp
@@ -10,6 +10,7 @@
 #include "OpenMP/InternalTypes.h"
 #include "OpenMP/omp.h"
 
+#include "OffloadPolicy.h"
 #include "PluginManager.h"
 #include "device.h"
 #include "omptarget.h"
@@ -56,22 +57,22 @@ void getTypeMismatch(omp_interop_property_t Property, int *Err) {
     *Err = getPropertyErrorType(Property);
 }
 
-const char *getVendorIdToStr(const omp_foreign_runtime_ids_t VendorId) {
-  switch (VendorId) {
-  case cuda:
-    return ("cuda");
-  case cuda_driver:
-    return ("cuda_driver");
-  case opencl:
-    return ("opencl");
-  case sycl:
-    return ("sycl");
-  case hip:
-    return ("hip");
-  case level_zero:
-    return ("level_zero");
-  }
-  return ("unknown");
+static const char *VendorStrTbl[] = {
+    "unknown", "amd",   "arm",  "bsc", "fujitsu", "gnu", "hpe",
+    "ibm",     "intel", "llvm", "nec", "nvidia",  "ti"};
+const char *getVendorIdToStr(const omp_vendor_id_t VendorId) {
+  if (VendorId < omp_vendor_unknown || VendorId >= omp_vendor_last)
+    return ("unknown");
+  return VendorStrTbl[VendorId];
+}
+
+static const char *ForeignRuntimeStrTbl[] = {
+    "none", "cuda", "cuda_driver", "opencl",
+    "sycl", "hip",  "level_zero",  "hsa"};
+const char *getForeignRuntimeIdToStr(const omp_foreign_runtime_id_t FrId) {
+  if (FrId < omp_fr_none || FrId >= omp_fr_last)
+    return ("unknown");
+  return ForeignRuntimeStrTbl[FrId];
 }
 
 template <typename PropertyTy>
@@ -83,7 +84,7 @@ intptr_t getProperty<intptr_t>(omp_interop_val_t &InteropVal,
                                omp_interop_property_t Property, int *Err) {
   switch (Property) {
   case omp_ipr_fr_id:
-    return InteropVal.backend_type_id;
+    return InteropVal.fr_id;
   case omp_ipr_vendor:
     return InteropVal.vendor_id;
   case omp_ipr_device_num:
@@ -99,10 +100,8 @@ const char *getProperty<const char *>(omp_interop_val_t &InteropVal,
                                       omp_interop_property_t Property,
                                       int *Err) {
   switch (Property) {
-  case omp_ipr_fr_id:
-    return InteropVal.interop_type == kmp_interop_type_tasksync
-               ? "tasksync"
-               : "device+context";
+  case omp_ipr_fr_name:
+    return getForeignRuntimeIdToStr(InteropVal.fr_id);
   case omp_ipr_vendor_name:
     return getVendorIdToStr(InteropVal.vendor_id);
   default:
@@ -120,6 +119,8 @@ void *getProperty<void *>(omp_interop_val_t &InteropVal,
       return InteropVal.device_info.Device;
     *Err = omp_irc_no_value;
     return const_cast<char *>(InteropVal.err_str);
+  case omp_ipr_platform:
+    return InteropVal.device_info.Platform;
   case omp_ipr_device_context:
     return InteropVal.device_info.Context;
   case omp_ipr_targetsync:
@@ -145,13 +146,13 @@ bool getPropertyCheck(omp_interop_val_t **InteropPtr,
     return false;
   }
   if (Property == omp_ipr_targetsync &&
-      (*InteropPtr)->interop_type != kmp_interop_type_tasksync) {
+      (*InteropPtr)->interop_type != kmp_interop_type_targetsync) {
     if (Err)
       *Err = omp_irc_other;
     return false;
   }
   if ((Property == omp_ipr_device || Property == omp_ipr_device_context) &&
-      (*InteropPtr)->interop_type == kmp_interop_type_tasksync) {
+      (*InteropPtr)->interop_type == kmp_interop_type_targetsync) {
     if (Err)
       *Err = omp_irc_other;
     return false;
@@ -166,7 +167,7 @@ bool getPropertyCheck(omp_interop_val_t **InteropPtr,
                                        omp_interop_property_t property_id,     \
                                        int *err) {                             \
     omp_interop_val_t *interop_val = (omp_interop_val_t *)interop;             \
-    assert((interop_val)->interop_type == kmp_interop_type_tasksync);          \
+    assert((interop_val)->interop_type == kmp_interop_type_targetsync);        \
     if (!getPropertyCheck(&interop_val, property_id, err)) {                   \
       return (RETURN_TYPE)(0);                                                 \
     }                                                                          \
@@ -193,119 +194,263 @@ __OMP_GET_INTEROP_TY3(const char *, type_desc)
 __OMP_GET_INTEROP_TY3(const char *, rc_desc)
 #undef __OMP_GET_INTEROP_TY3
 
-static const char *copyErrorString(llvm::Error &&Err) {
-  // TODO: Use the error string while avoiding leaks.
-  std::string ErrMsg = llvm::toString(std::move(Err));
-  char *UsrMsg = reinterpret_cast<char *>(malloc(ErrMsg.size() + 1));
-  strcpy(UsrMsg, ErrMsg.c_str());
-  return UsrMsg;
-}
-
 extern "C" {
 
-void __tgt_interop_init(ident_t *LocRef, int32_t Gtid,
-                        omp_interop_val_t *&InteropPtr,
-                        kmp_interop_type_t InteropType, int32_t DeviceId,
-                        int32_t Ndeps, kmp_depend_info_t *DepList,
-                        int32_t HaveNowait) {
-  int32_t NdepsNoalias = 0;
-  kmp_depend_info_t *NoaliasDepList = NULL;
-  assert(InteropType != kmp_interop_type_unknown &&
-         "Cannot initialize with unknown interop_type!");
-  if (DeviceId == -1) {
-    DeviceId = omp_get_default_device();
+omp_interop_val_t *__tgt_interop_get(ident_t *LocRef, int32_t InteropType,
+                                     int64_t DeviceNum, int32_t NumPrefers,
+                                     interop_spec_t *Prefers,
+                                     interop_ctx_t *Ctx, dep_pack_t *Deps) {
+
+  DP("Call to %s with device_num %" PRId64 ", interop type %" PRId32
+     ", number of preferred specs %" PRId32 "%s%s\n",
+     __func__, DeviceNum, InteropType, NumPrefers,
+     Ctx->flags.implicit ? " (implicit)" : "",
+     Ctx->flags.nowait ? " (nowait)" : "");
+
+  if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED)
+    return omp_interop_none;
+
+  // Now, try to create an interop with device_num.
+  if (DeviceNum == OFFLOAD_DEVICE_DEFAULT)
+    DeviceNum = omp_get_default_device();
+
+  auto gtid = Ctx->gtid;
+
+  if (InteropType == kmp_interop_type_targetsync) {
+    if (Ctx->flags.nowait)
+      DP("Warning: nowait flag on interop creation not supported yet. "
+         "Ignored\n");
+    if (Deps)
+      __kmpc_omp_wait_deps(LocRef, gtid, Deps->ndeps, Deps->deplist,
+                           Deps->ndeps_noalias, Deps->noalias_deplist);
   }
 
-  if (InteropType == kmp_interop_type_tasksync) {
-    __kmpc_omp_wait_deps(LocRef, Gtid, Ndeps, DepList, NdepsNoalias,
-                         NoaliasDepList);
+  auto DeviceOrErr = PM->getDevice(DeviceNum);
+  if (!DeviceOrErr) {
+    [[maybe_unused]] std::string ErrStr = toString(DeviceOrErr.takeError());
+    DP("Couldn't find device %" PRId64
+       " while constructing interop object: %s\n",
+       DeviceNum, ErrStr.c_str());
+    return omp_interop_none;
+  }
+  auto &Device = *DeviceOrErr;
+  omp_interop_val_t *Interop = omp_interop_none;
+  auto InteropSpec = Device.RTL->select_interop_preference(
+      DeviceNum, InteropType, NumPrefers, Prefers);
+  if (InteropSpec.fr_id == omp_fr_none) {
+    DP("Interop request not supported by device %" PRId64 "\n", DeviceNum);
+    return omp_interop_none;
+  }
+  DP("Selected interop preference is fr_id=%s%s impl_attrs=%" PRId64 "\n",
+     getForeignRuntimeIdToStr((omp_foreign_runtime_id_t)InteropSpec.fr_id),
+     InteropSpec.attrs.inorder ? " inorder" : "", InteropSpec.impl_attrs);
+
+  if (Ctx->flags.implicit) {
+    // This is a request for an RTL managed interop object.
+    // Get it from the InteropTbl if possible
+    if (PM->InteropTbl.size() > 0) {
+      for (auto iop : PM->InteropTbl) {
+        if (iop->isCompatibleWith(InteropType, InteropSpec, DeviceNum, gtid)) {
+          Interop = iop;
+          Interop->markDirty();
+          DP("Reused interop " DPxMOD " from device number %" PRId64
+             " for gtid %" PRId32 "\n",
+             DPxPTR(Interop), DeviceNum, gtid);
+          return Interop;
+        }
+      }
+    }
   }
 
-  InteropPtr = new omp_interop_val_t(DeviceId, InteropType);
-
-  auto DeviceOrErr = PM->getDevice(DeviceId);
-  if (!DeviceOrErr) {
-    InteropPtr->err_str = copyErrorString(DeviceOrErr.takeError());
-    return;
+  Interop = Device.RTL->create_interop(DeviceNum, InteropType, &InteropSpec);
+  DP("Created an interop " DPxMOD " from device number %" PRId64 "\n",
+     DPxPTR(Interop), DeviceNum);
+
+  if (Ctx->flags.implicit) {
+    // register the new implicit interop in the RTL
+    Interop->setOwner(gtid);
+    Interop->markDirty();
+    PM->InteropTbl.add(Interop);
+  } else {
+    Interop->setOwner(-1);
   }
 
-  DeviceTy &Device = *DeviceOrErr;
-  if (!Device.RTL ||
-      Device.RTL->init_device_info(DeviceId, &(InteropPtr)->device_info,
-                                   &(InteropPtr)->err_str)) {
-    delete InteropPtr;
-    InteropPtr = omp_interop_none;
+  return Interop;
+}
+
+int __tgt_interop_use(ident_t *LocRef, omp_interop_val_t *Interop,
+                      interop_ctx_t *Ctx, dep_pack_t *Deps) {
+  bool nowait = Ctx->flags.nowait;
+  DP("Call to %s with interop " DPxMOD ", nowait %" PRId32 "\n", __func__,
+     DPxPTR(Interop), nowait);
+  if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop)
+    return OFFLOAD_FAIL;
+
+  if (!Interop)
+    return OFFLOAD_FAIL;
+
+  if (Interop->interop_type == kmp_interop_type_targetsync) {
+    if (Ctx->flags.nowait)
+      DP("Warning: nowait flag on interop use not supported yet. "
+         "Ignored\n");
+    if (Deps)
+      __kmpc_omp_wait_deps(LocRef, Ctx->gtid, Deps->ndeps, Deps->deplist,
+                           Deps->ndeps_noalias, Deps->noalias_deplist);
   }
-  if (InteropType == kmp_interop_type_tasksync) {
-    if (!Device.RTL ||
-        Device.RTL->init_async_info(DeviceId, &(InteropPtr)->async_info)) {
-      delete InteropPtr;
-      InteropPtr = omp_interop_none;
+
+  if (Interop->async_info && Interop->async_info->Queue) {
+    if (nowait)
+      Interop->asyncBarrier();
+    else {
+      Interop->flush();
+      Interop->syncBarrier();
+      Interop->markClean();
     }
   }
+
+  return OFFLOAD_SUCCESS;
 }
 
-void __tgt_interop_use(ident_t *LocRef, int32_t Gtid,
-                       omp_interop_val_t *&InteropPtr, int32_t DeviceId,
-                       int32_t Ndeps, kmp_depend_info_t *DepList,
-                       int32_t HaveNowait) {
-  int32_t NdepsNoalias = 0;
-  kmp_depend_info_t *NoaliasDepList = NULL;
-  assert(InteropPtr && "Cannot use nullptr!");
-  omp_interop_val_t *InteropVal = InteropPtr;
-  if (DeviceId == -1) {
-    DeviceId = omp_get_default_device();
-  }
-  assert(InteropVal != omp_interop_none &&
-         "Cannot use uninitialized interop_ptr!");
-  assert((DeviceId == -1 || InteropVal->device_id == DeviceId) &&
-         "Inconsistent device-id usage!");
+int __tgt_interop_release(ident_t *LocRef, omp_interop_val_t *Interop,
+                          interop_ctx_t *Ctx, dep_pack_t *Deps) {
+  DP("Call to %s with interop " DPxMOD "\n", __func__, DPxPTR(Interop));
 
-  auto DeviceOrErr = PM->getDevice(DeviceId);
-  if (!DeviceOrErr) {
-    InteropPtr->err_str = copyErrorString(DeviceOrErr.takeError());
-    return;
+  if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop)
+    return OFFLOAD_FAIL;
+
+  if (!Interop)
+    return OFFLOAD_FAIL;
+
+  if (Interop->interop_type == kmp_interop_type_targetsync) {
+    if (Ctx->flags.nowait)
+      DP("Warning: nowait flag on interop destroy not supported yet. "
+         "Ignored\n");
+    if (Deps) {
+      __kmpc_omp_wait_deps(LocRef, Ctx->gtid, Deps->ndeps, Deps->deplist,
+                           Deps->ndeps_noalias, Deps->noalias_deplist);
+    }
   }
 
-  if (InteropVal->interop_type == kmp_interop_type_tasksync) {
-    __kmpc_omp_wait_deps(LocRef, Gtid, Ndeps, DepList, NdepsNoalias,
-                         NoaliasDepList);
+  return Interop->release();
+}
+
+} // extern "C"
+
+bool omp_interop_val_t::isCompatibleWith(int32_t InteropType,
+                                         const interop_spec_t &Spec) {
+  if (interop_type != InteropType)
+    return false;
+  if (Spec.fr_id != fr_id)
+    return false;
+  if (Spec.attrs.inorder != attrs.inorder)
+    return false;
+  if (Spec.impl_attrs != impl_attrs)
+    return false;
+
+  return true;
+}
+
+bool omp_interop_val_t::isCompatibleWith(int32_t InteropType,
+                                         const interop_spec_t &Spec,
+                                         int64_t DeviceNum, int GTID) {
+  if (device_id != DeviceNum)
+    return false;
+
+  if (GTID != OwnerGtid)
+    return false;
+
+  return isCompatibleWith(InteropType, Spec);
+}
+
+int32_t omp_interop_val_t::flush(DeviceTy &Device) {
+  return Device.RTL->flush_queue(this);
+}
+
+int32_t omp_interop_val_t::sync_barrier(DeviceTy &Device) {
+  if (Device.RTL->sync_barrier(this) != OFFLOAD_SUCCESS) {
+    FATAL_MESSAGE(device_id, "Interop sync barrier failed for %p object\n",
+                  this);
   }
-  // TODO Flush the queue associated with the interop through the plugin
+  DP("Calling completion callbacks for " DPxMOD "\n", DPxPTR(this));
+  runCompletionCbs();
+  return OFFLOAD_SUCCESS;
+}
+
+int32_t omp_interop_val_t::async_barrier(DeviceTy &Device) {
+  return Device.RTL->async_barrier(this);
 }
 
-void __tgt_interop_destroy(ident_t *LocRef, int32_t Gtid,
-                           omp_interop_val_t *&InteropPtr, int32_t DeviceId,
-                           int32_t Ndeps, kmp_depend_info_t *DepList,
-                           int32_t HaveNowait) {
-  int32_t NdepsNoalias = 0;
-  kmp_depend_info_t *NoaliasDepList = NULL;
-  assert(InteropPtr && "Cannot use nullptr!");
-  omp_interop_val_t *InteropVal = InteropPtr;
-  if (DeviceId == -1) {
-    DeviceId = omp_get_default_device();
+int32_t omp_interop_val_t::release(DeviceTy &Device) {
+  if (async_info != nullptr && (!hasOwner() || !isClean())) {
+    flush();
+    syncBarrier();
   }
+  return Device.RTL->release_interop(device_id, this);
+}
 
-  if (InteropVal == omp_interop_none)
-    return;
+int32_t omp_interop_val_t::flush() {
+  auto DeviceOrErr = PM->getDevice(device_id);
+  if (!DeviceOrErr)
+    FATAL_MESSAGE(device_id, "%s", toString(DeviceOrErr.takeError()).c_str());
+  DeviceTy &Device = *DeviceOrErr;
+  return flush(Device);
+}
 
-  assert((DeviceId == -1 || InteropVal->device_id == DeviceId) &&
-         "Inconsistent device-id usage!");
-  auto DeviceOrErr = PM->getDevice(DeviceId);
-  if (!DeviceOrErr) {
-    InteropPtr->err_str = copyErrorString(DeviceOrErr.takeError());
+int32_t omp_interop_val_t::syncBarrier() {
+  auto DeviceOrErr = PM->getDevice(device_id);
+  if (!DeviceOrErr)
+    FATAL_MESSAGE(device_id, "%s", toString(DeviceOrErr.takeError()).c_str());
+  DeviceTy &Device = *DeviceOrErr;
+  return sync_barrier(Device);
+}
+
+int32_t omp_interop_val_t::asyncBarrier() {
+  auto DeviceOrErr = PM->getDevice(device_id);
+  if (!DeviceOrErr)
+    FATAL_MESSAGE(device_id, "%s", toString(DeviceOrErr.takeError()).c_str());
+  DeviceTy &Device = *DeviceOrErr;
+  return async_barrier(Device);
+}
+
+int32_t omp_interop_val_t::release() {
+  auto DeviceOrErr = PM->getDevice(device_id);
+  if (!DeviceOrErr)
+    FATAL_MESSAGE(device_id, "%s", toString(DeviceOrErr.takeError()).c_str());
+  DeviceTy &Device = *DeviceOrErr;
+  return release(Device);
+}
+
+void syncImplicitInterops(int gtid, void *event) {
+  if (PM->InteropTbl.size() == 0)
     return;
-  }
 
-  if (InteropVal->interop_type == kmp_interop_type_tasksync) {
-    __kmpc_omp_wait_deps(LocRef, Gtid, Ndeps, DepList, NdepsNoalias,
-                         NoaliasDepList);
+  DP("target_sync: syncing interops for gtid %" PRId32 ", event " DPxMOD "\n",
+     gtid, DPxPTR(event));
+
+  for (auto iop : PM->InteropTbl) {
+    if (iop->async_info && iop->async_info->Queue && iop->isOwnedBy(gtid) &&
+        !iop->isClean()) {
+
+      iop->flush();
+      iop->syncBarrier();
+      iop->markClean();
+
+      // TODO: Alternate implementation option
+      // Instead of using a synchronous barrier, queue an asynchronous
+      // barrier and create a proxy task associated to the event to handle
+      // OpenMP synchronizations.
+      // When the event is completed, fulfill the proxy task to notify the
+      // OpenMP runtime.
+      // event = iop->asyncBarrier();
+      // ptask = createProxyTask();
+      // Events->add(event,ptask);
+    }
   }
-  // TODO Flush the queue associated with the interop through the plugin
-  // TODO Signal out dependences
-
-  delete InteropPtr;
-  InteropPtr = omp_interop_none;
+  // This would be needed for the alternate implementation
+  // processEvents();
 }
 
-} // extern "C"
+void InteropTblTy::clear() {
+  DP("Clearing Interop Table\n");
+  PerThreadTable::clear([](auto &IOP) { IOP->release(); });
+}
diff --git a/offload/libomptarget/PluginManager.cpp b/offload/libomptarget/PluginManager.cpp
index 93589960a426d..2cc1314e7a4f0 100644
--- a/offload/libomptarget/PluginManager.cpp
+++ b/offload/libomptarget/PluginManager.cpp
@@ -128,6 +128,12 @@ void PluginManager::initializeAllDevices() {
       initializeDevice(Plugin, DeviceId);
     }
   }
+  // After all plugins are initialized, register atExit cleanup handlers
+  std::atexit([]() {
+    // Interop cleanup should be done before the plugins are deinitialized as
+    // the backend libraries may be already unloaded.
+    PM->InteropTbl.clear();
+  });
 }
 
 // Returns a pointer to the binary descriptor, upgrading from a legacy format if
diff --git a/offload/libomptarget/exports b/offload/libomptarget/exports
index 2406776c1fb5f..b40d9b22a1be9 100644
--- a/offload/libomptarget/exports
+++ b/offload/libomptarget/exports
@@ -67,9 +67,10 @@ VERS1.0 {
     omp_get_interop_int;
     omp_get_interop_name;
     omp_get_interop_type_desc;
-    __tgt_interop_init;
+    __tgt_interop_get;
     __tgt_interop_use;
-    __tgt_interop_destroy;
+    __tgt_interop_release;
+    __tgt_target_sync;
     __llvmPushCallConfiguration;
     __llvmPopCallConfiguration;
     llvmLaunchKernel;
diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h
index d2437908a0a6f..40a428dbccb06 100644
--- a/offload/plugins-nextgen/common/include/PluginInterface.h
+++ b/offload/plugins-nextgen/common/include/PluginInterface.h
@@ -20,6 +20,7 @@
 #include <vector>
 
 #include "ExclusiveAccess.h"
+#include "OpenMP/InteropAPI.h"
 #include "Shared/APITypes.h"
 #include "Shared/Debug.h"
 #include "Shared/Environment.h"
@@ -937,6 +938,21 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
   bool useAutoZeroCopy();
   virtual bool useAutoZeroCopyImpl() { return false; }
 
+  virtual omp_interop_val_t *createInterop(int32_t InteropType,
+                                           interop_spec_t &InteropSpec) {
+    return nullptr;
+  }
+
+  virtual int32_t releaseInterop(omp_interop_val_t *Interop) {
+    return OFFLOAD_SUCCESS;
+  }
+
+  virtual interop_spec_t selectInteropPreference(int32_t InteropType,
+                                                 int32_t NumPrefers,
+                                                 interop_spec_t *Prefers) {
+    return interop_spec_t{omp_fr_none, {false, 0}, 0};
+  }
+
   /// Allocate and construct a kernel object.
   virtual Expected<GenericKernelTy &> constructKernel(const char *Name) = 0;
 
@@ -1342,6 +1358,45 @@ struct GenericPluginTy {
   int32_t get_function(__tgt_device_binary Binary, const char *Name,
                        void **KernelPtr);
 
+  /// Return the interop specification that the plugin supports
+  /// It might not be one of the user specified ones.
+  interop_spec_t select_interop_preference(int32_t ID, int32_t InteropType,
+                                           int32_t NumPrefers,
+                                           interop_spec_t *Prefers) {
+    auto &Device = getDevice(ID);
+    return Device.selectInteropPreference(InteropType, NumPrefers, Prefers);
+  }
+
+  /// Create OpenMP interop with the given interop context
+  omp_interop_val_t *create_interop(int32_t ID, int32_t InteropContext,
+                                    interop_spec_t *InteropSpec) {
+    auto &Device = getDevice(ID);
+    return Device.createInterop(InteropContext, *InteropSpec);
+  }
+
+  /// Release OpenMP interop object
+  int32_t release_interop(int32_t ID, omp_interop_val_t *Interop) {
+    auto &Device = getDevice(ID);
+    return Device.releaseInterop(Interop);
+  }
+
+  /// Flush the queue associated with the interop object if necessary
+  virtual int32_t flush_queue(omp_interop_val_t *Interop) {
+    return OFFLOAD_SUCCESS;
+  }
+
+  /// Queue a synchronous barrier in the queue associated with the interop
+  /// object and wait for it to complete.
+  virtual int32_t sync_barrier(omp_interop_val_t *Interop) {
+    return OFFLOAD_FAIL;
+  }
+
+  /// Queue an asynchronous barrier in the queue associated with the interop
+  /// object and return immediately.
+  virtual int32_t async_barrier(omp_interop_val_t *Interop) {
+    return OFFLOAD_FAIL;
+  }
+
 private:
   /// Indicates if the platform runtime has been fully initialized.
   bool Initialized = false;
diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index a2cacc8792b15..9c4939b029861 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -4665,6 +4665,13 @@ static inline int __kmp_adjust_gtid_for_hidden_helpers(int gtid) {
   return adjusted_gtid;
 }
 
+#if ENABLE_LIBOMPTARGET
+// Pointers to callbacks registered by the offload library to be notified of
+// task progress.
+extern void (*kmp_target_sync_cb)(ident_t *loc_ref, int gtid,
+                                  void *current_task, void *event);
+#endif // ENABLE_LIBOMPTARGET
+
 // Support for error directive
 typedef enum kmp_severity_t {
   severity_warning = 1,
diff --git a/openmp/runtime/src/kmp_barrier.cpp b/openmp/runtime/src/kmp_barrier.cpp
index d7ef57c608149..c6908c35fc3d9 100644
--- a/openmp/runtime/src/kmp_barrier.cpp
+++ b/openmp/runtime/src/kmp_barrier.cpp
@@ -1828,6 +1828,14 @@ static int __kmp_barrier_template(enum barrier_type bt, int gtid, int is_split,
   }
 #endif
 
+#if ENABLE_LIBOMPTARGET
+  // Give an opportunity to the offload runtime to make progress and create
+  // proxy tasks if necessary
+  if (UNLIKELY(kmp_target_sync_cb != NULL))
+    (*kmp_target_sync_cb)(
+        NULL, gtid, KMP_TASKDATA_TO_TASK(this_thr->th.th_current_task), NULL);
+#endif
+
   if (!team->t.t_serialized) {
 #if USE_ITT_BUILD
     // This value will be used in itt notify events below.
diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp
index 417eceb8ebecc..d99d1a410b5d3 100644
--- a/openmp/runtime/src/kmp_runtime.cpp
+++ b/openmp/runtime/src/kmp_runtime.cpp
@@ -93,6 +93,9 @@ static void __kmp_partition_places(kmp_team_t *team,
                                    int update_master_only = 0);
 #endif
 static void __kmp_do_serial_initialize(void);
+#if ENABLE_LIBOMPTARGET
+static void __kmp_target_init(void);
+#endif // ENABLE_LIBOMPTARGET
 void __kmp_fork_barrier(int gtid, int tid);
 void __kmp_join_barrier(int gtid);
 void __kmp_setup_icv_copy(kmp_team_t *team, int new_nproc,
@@ -7173,6 +7176,9 @@ static void __kmp_do_serial_initialize(void) {
 #if KMP_MIC_SUPPORTED
   __kmp_check_mic_type();
 #endif
+#if ENABLE_LIBOMPTARGET
+  __kmp_target_init();
+#endif /* ENABLE_LIBOMPTARGET */
 
 // Some global variable initialization moved here from kmp_env_initialize()
 #ifdef KMP_DEBUG
@@ -9386,6 +9392,15 @@ void __kmp_set_nesting_mode_threads() {
     set__max_active_levels(thread, __kmp_nesting_mode_nlevels);
 }
 
+#if ENABLE_LIBOMPTARGET
+void (*kmp_target_sync_cb)(ident_t *loc_ref, int gtid, void *current_task,
+                           void *event) = NULL;
+void __kmp_target_init() {
+  // Look for hooks in the libomptarget library
+  *(void **)(&kmp_target_sync_cb) = KMP_DLSYM("__tgt_target_sync");
+}
+#endif // ENABLE_LIBOMPTARGET
+
 // Empty symbols to export (see exports_so.txt) when feature is disabled
 extern "C" {
 #if !KMP_STATS_ENABLED
diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp
index 3d85a29423540..d45e3d690510e 100644
--- a/openmp/runtime/src/kmp_tasking.cpp
+++ b/openmp/runtime/src/kmp_tasking.cpp
@@ -1378,6 +1378,13 @@ void __kmp_init_implicit_task(ident_t *loc_ref, kmp_info_t *this_thr,
 // thread:  thread data structure corresponding to implicit task
 void __kmp_finish_implicit_task(kmp_info_t *thread) {
   kmp_taskdata_t *task = thread->th.th_current_task;
+#if ENABLE_LIBOMPTARGET
+  // Give an opportunity to the offload runtime to synchronize any unfinished
+  // target async regions before finishing the implicit task
+  if (UNLIKELY(kmp_target_sync_cb != NULL))
+    (*kmp_target_sync_cb)(NULL, thread->th.th_info.ds.ds_gtid,
+                          KMP_TASKDATA_TO_TASK(task), NULL);
+#endif // ENABLE_LIBOMPTARGET
   if (task->td_dephash) {
     int children;
     task->td_flags.complete = 1;
@@ -2249,6 +2256,14 @@ static kmp_int32 __kmpc_omp_taskwait_template(ident_t *loc_ref, kmp_int32 gtid,
     }
 #endif // OMPT_SUPPORT && OMPT_OPTIONAL
 
+#if ENABLE_LIBOMPTARGET
+    // Give an opportunity to the offload runtime to make progress and create
+    // any necessary proxy tasks
+    if (UNLIKELY(kmp_target_sync_cb))
+      (*kmp_target_sync_cb)(loc_ref, gtid, KMP_TASKDATA_TO_TASK(taskdata),
+                            NULL);
+#endif // ENABLE_LIBOMPTARGET
+
 // Debugger: The taskwait is active. Store location and thread encountered the
 // taskwait.
 #if USE_ITT_BUILD
@@ -2948,6 +2963,13 @@ void __kmpc_end_taskgroup(ident_t *loc, int gtid) {
     }
 #endif
 
+#if ENABLE_LIBOMPTARGET
+    // Give an opportunity to the offload runtime to make progress and create
+    // any necessary proxy tasks
+    if (UNLIKELY(kmp_target_sync_cb))
+      (*kmp_target_sync_cb)(loc, gtid, KMP_TASKDATA_TO_TASK(taskdata), NULL);
+#endif // ENABLE_LIBOMPTARGET
+
     if (!taskdata->td_flags.team_serial ||
         (thread->th.th_task_team != NULL &&
          (thread->th.th_task_team->tt.tt_found_proxy_tasks ||
@@ -3391,6 +3413,13 @@ static inline int __kmp_execute_tasks_template(
   while (1) { // Outer loop keeps trying to find tasks in case of single thread
     // getting tasks from target constructs
     while (1) { // Inner loop to find a task and execute it
+#if ENABLE_LIBOMPTARGET
+      // Give an opportunity to the offload runtime to make progress
+      if (UNLIKELY(kmp_target_sync_cb))
+        (*kmp_target_sync_cb)(NULL, gtid, KMP_TASKDATA_TO_TASK(current_task),
+                              NULL);
+#endif // ENABLE_LIBOMPTARGET
+
       task = NULL;
       if (task_team->tt.tt_num_task_pri) { // get priority task first
         task = __kmp_get_priority_task(gtid, task_team, is_constrained);

>From 81c7402e96fc54b90cf9a1eb46f9f6c2d22ccd13 Mon Sep 17 00:00:00 2001
From: Alex Duran <alejandro.duran at intel.com>
Date: Wed, 2 Jul 2025 13:55:30 +0200
Subject: [PATCH 2/3] Add missed ext API and minor fix

---
 offload/libomptarget/OpenMP/InteropAPI.cpp | 34 +++++++++++++++++++---
 offload/libomptarget/exports               |  1 +
 2 files changed, 31 insertions(+), 4 deletions(-)

diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp
index 55e47d87a865d..14b1f85802464 100644
--- a/offload/libomptarget/OpenMP/InteropAPI.cpp
+++ b/offload/libomptarget/OpenMP/InteropAPI.cpp
@@ -290,12 +290,16 @@ int __tgt_interop_use(ident_t *LocRef, omp_interop_val_t *Interop,
     return OFFLOAD_FAIL;
 
   if (Interop->interop_type == kmp_interop_type_targetsync) {
-    if (Ctx->flags.nowait)
-      DP("Warning: nowait flag on interop use not supported yet. "
-         "Ignored\n");
-    if (Deps)
+    if (Deps) {
+      if (nowait) {
+        DP("Warning: nowait flag on interop use with dependences not supported yet. "
+           "Ignored\n");
+        nowait = false;
+      }
+
       __kmpc_omp_wait_deps(LocRef, Ctx->gtid, Deps->ndeps, Deps->deplist,
                            Deps->ndeps_noalias, Deps->noalias_deplist);
+    }
   }
 
   if (Interop->async_info && Interop->async_info->Queue) {
@@ -334,6 +338,28 @@ int __tgt_interop_release(ident_t *LocRef, omp_interop_val_t *Interop,
   return Interop->release();
 }
 
+
+EXTERN int ompx_interop_add_completion_callback(omp_interop_val_t *Interop,
+                                                ompx_interop_cb_t *cb,
+                                                void *data) {
+  DP("Call to %s with interop " DPxMOD ", property callback " DPxMOD
+     "and data " DPxMOD "\n",
+     __func__, DPxPTR(Interop), DPxPTR(cb), DPxPTR(data));
+
+  if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop)
+    return omp_irc_other;
+
+  if (!Interop) {
+    DP("Call to %s with invalid interop\n", __func__);
+    return omp_irc_empty;
+  }
+
+  Interop->addCompletionCb(cb, data);
+
+  return omp_irc_success;
+}
+
+
 } // extern "C"
 
 bool omp_interop_val_t::isCompatibleWith(int32_t InteropType,
diff --git a/offload/libomptarget/exports b/offload/libomptarget/exports
index b40d9b22a1be9..8e2db6ba8bba4 100644
--- a/offload/libomptarget/exports
+++ b/offload/libomptarget/exports
@@ -36,6 +36,7 @@ VERS1.0 {
     __kmpc_push_target_tripcount;
     __kmpc_push_target_tripcount_mapper;
     ompx_dump_mapping_tables;
+    ompx_interop_add_completion_callback;
     omp_get_mapped_ptr;
     omp_get_num_devices;
     omp_get_device_num;

>From 0a2825532e274fde73c3633ea471e9c9f3050f48 Mon Sep 17 00:00:00 2001
From: Alex Duran <alejandro.duran at intel.com>
Date: Wed, 2 Jul 2025 14:13:38 +0200
Subject: [PATCH 3/3] Fix format

---
 offload/libomptarget/OpenMP/InteropAPI.cpp | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp
index 14b1f85802464..c6413431b3e13 100644
--- a/offload/libomptarget/OpenMP/InteropAPI.cpp
+++ b/offload/libomptarget/OpenMP/InteropAPI.cpp
@@ -327,8 +327,8 @@ int __tgt_interop_release(ident_t *LocRef, omp_interop_val_t *Interop,
 
   if (Interop->interop_type == kmp_interop_type_targetsync) {
     if (Ctx->flags.nowait)
-      DP("Warning: nowait flag on interop destroy not supported yet. "
-         "Ignored\n");
+      DP("Warning: nowait flag on interop destroy not supported "
+         "yet. Ignored\n");
     if (Deps) {
       __kmpc_omp_wait_deps(LocRef, Ctx->gtid, Deps->ndeps, Deps->deplist,
                            Deps->ndeps_noalias, Deps->noalias_deplist);
@@ -338,7 +338,6 @@ int __tgt_interop_release(ident_t *LocRef, omp_interop_val_t *Interop,
   return Interop->release();
 }
 
-
 EXTERN int ompx_interop_add_completion_callback(omp_interop_val_t *Interop,
                                                 ompx_interop_cb_t *cb,
                                                 void *data) {
@@ -359,7 +358,6 @@ EXTERN int ompx_interop_add_completion_callback(omp_interop_val_t *Interop,
   return omp_irc_success;
 }
 
-
 } // extern "C"
 
 bool omp_interop_val_t::isCompatibleWith(int32_t InteropType,



More information about the Openmp-commits mailing list