[Openmp-commits] [openmp] f2c3859 - [OpenMP] Remove shadow pointer map and introduce consistent locking

Johannes Doerfert via Openmp-commits openmp-commits at lists.llvm.org
Tue Mar 21 19:17:13 PDT 2023


Author: Johannes Doerfert
Date: 2023-03-21T19:16:27-07:00
New Revision: f2c385934b0cb5fee14e62528497f76a9a534d77

URL: https://github.com/llvm/llvm-project/commit/f2c385934b0cb5fee14e62528497f76a9a534d77
DIFF: https://github.com/llvm/llvm-project/commit/f2c385934b0cb5fee14e62528497f76a9a534d77.diff

LOG: [OpenMP] Remove shadow pointer map and introduce consistent locking

The shadow pointer map was problematic as we scanned an entire list if
an entry had shadow pointers. The new scheme stores the shadow pointers
inside the entries. This allows easy access without any search. It also
helps us, but also makes it necessary, to define a consistent locking
scheme. The implicit locking of entries via TargetPointerResultTy makes
this pretty effortless, however one has to:

- Lock HDTTMap before locking an entry.
- Do not lock HDTTMap while holding an entry lock.
- Hold the entry lock to read or modify an entry.

The changes to submitData and retrieveData have been made to ensure 2
when the LIBOMPTARGET_INFO flag is used. Most everything else happens by
itself as TargetPointerResultTy acts as a lock_guard for the entry. It
is a little complicated when we deal with multiple entries, especially
as they can be equal. However, one can still follow the rules with
reasonable effort.

LookupResult are now finally also locking the entry before it is
inspected. This is good even if we haven't run into a problem yet.

Differential Revision: https://reviews.llvm.org/D123446

Added: 
    

Modified: 
    openmp/libomptarget/include/device.h
    openmp/libomptarget/src/api.cpp
    openmp/libomptarget/src/device.cpp
    openmp/libomptarget/src/omptarget.cpp

Removed: 
    


################################################################################
diff  --git a/openmp/libomptarget/include/device.h b/openmp/libomptarget/include/device.h
index f1fbc287d017f..695f864b5ec66 100644
--- a/openmp/libomptarget/include/device.h
+++ b/openmp/libomptarget/include/device.h
@@ -16,6 +16,7 @@
 #include <cassert>
 #include <cstddef>
 #include <cstdint>
+#include <cstring>
 #include <list>
 #include <map>
 #include <memory>
@@ -26,6 +27,7 @@
 #include "ExclusiveAccess.h"
 #include "omptarget.h"
 #include "rtl.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 
 // Forward declarations.
@@ -43,6 +45,22 @@ enum kmp_target_offload_kind {
 };
 typedef enum kmp_target_offload_kind kmp_target_offload_kind_t;
 
+/// Information about shadow pointers.
+struct ShadowPtrInfoTy {
+  void **HstPtrAddr = nullptr;
+  void *HstPtrVal = nullptr;
+  void **TgtPtrAddr = nullptr;
+  void *TgtPtrVal = nullptr;
+
+  bool operator==(const ShadowPtrInfoTy &Other) const {
+    return HstPtrAddr == Other.HstPtrAddr;
+  }
+};
+
+inline bool operator<(const ShadowPtrInfoTy &lhs, const ShadowPtrInfoTy &rhs) {
+  return lhs.HstPtrAddr < rhs.HstPtrAddr;
+}
+
 /// Map between host data and target data.
 struct HostDataToTargetTy {
   const uintptr_t HstPtrBase; // host info.
@@ -60,8 +78,7 @@ struct HostDataToTargetTy {
 
   struct StatesTy {
     StatesTy(uint64_t DRC, uint64_t HRC)
-        : DynRefCount(DRC), HoldRefCount(HRC),
-          MayContainAttachedPointers(false) {}
+        : DynRefCount(DRC), HoldRefCount(HRC) {}
     /// The dynamic reference count is the standard reference count as of OpenMP
     /// 4.5.  The hold reference count is an OpenMP extension for the sake of
     /// OpenACC support.
@@ -80,17 +97,10 @@ struct HostDataToTargetTy {
     uint64_t DynRefCount;
     uint64_t HoldRefCount;
 
-    /// Boolean flag to remember if any subpart of the mapped region might be
-    /// an attached pointer.
-    bool MayContainAttachedPointers;
-
-    /// This mutex will be locked when data movement is issued. For targets that
-    /// doesn't support async data movement, this mutex can guarantee that after
-    /// it is released, memory region on the target is update to date. For
-    /// targets that support async data movement, this can guarantee that data
-    /// movement has been issued. This mutex *must* be locked right before
-    /// releasing the mapping table lock.
-    std::mutex UpdateMtx;
+    /// A map of shadow pointers associated with this entry, the keys are host
+    /// pointer addresses to identify stale entries.
+    llvm::SmallSet<ShadowPtrInfoTy, 2> ShadowPtrInfos;
+
     /// Pointer to the event corresponding to the data update of this map.
     /// Note: At present this event is created when the first data transfer from
     /// host to device is issued, and only being used for H2D. It is not used
@@ -222,16 +232,41 @@ struct HostDataToTargetTy {
     return ThisRefCount == 1;
   }
 
-  void setMayContainAttachedPointers() const {
-    States->MayContainAttachedPointers = true;
+  /// Add the shadow pointer info \p ShadowPtrInfo to this entry but only if the
+  /// the target ptr value was not already present in the existing set of shadow
+  /// pointers. Return true if something was added.
+  bool addShadowPointer(const ShadowPtrInfoTy &ShadowPtrInfo) const {
+    auto Pair = States->ShadowPtrInfos.insert(ShadowPtrInfo);
+    if (Pair.second)
+      return true;
+    // Check for a stale entry, if found, replace the old one.
+    if ((*Pair.first).TgtPtrVal == ShadowPtrInfo.TgtPtrVal)
+      return false;
+    States->ShadowPtrInfos.erase(ShadowPtrInfo);
+    return addShadowPointer(ShadowPtrInfo);
   }
-  bool getMayContainAttachedPointers() const {
-    return States->MayContainAttachedPointers;
+
+  /// Apply \p CB to all shadow pointers of this entry. Returns OFFLOAD_FAIL if
+  /// \p CB returned OFFLOAD_FAIL for any of them, otherwise this returns
+  /// OFFLOAD_SUCCESS. The entry is locked for this operation.
+  template <typename CBTy> int foreachShadowPointerInfo(CBTy CB) const {
+    for (auto &It : States->ShadowPtrInfos)
+      if (CB(It) == OFFLOAD_FAIL)
+        return OFFLOAD_FAIL;
+    return OFFLOAD_SUCCESS;
   }
 
-  void lock() const { States->UpdateMtx.lock(); }
+  /// Lock this entry for exclusive access. Ensure to get exclusive access to
+  /// HDTTMap first!
+  void lock() const { Mtx.lock(); }
+
+  /// Unlock this entry to allow other threads inspecting it.
+  void unlock() const { Mtx.unlock(); }
 
-  void unlock() const { States->UpdateMtx.unlock(); }
+private:
+  // Mutex that needs to be held before the entry is inspected or modified. The
+  // HDTTMap mutex needs to be held before trying to lock any HDTT Entry.
+  mutable std::mutex Mtx;
 };
 
 /// Wrapper around the HostDataToTargetTy to be used in the HDTT map. In
@@ -243,6 +278,7 @@ struct HostDataToTargetMapKeyTy {
   uintptr_t KeyValue;
 
   HostDataToTargetMapKeyTy(void *Key) : KeyValue(uintptr_t(Key)) {}
+  HostDataToTargetMapKeyTy(uintptr_t Key) : KeyValue(Key) {}
   HostDataToTargetMapKeyTy(HostDataToTargetTy *HDTT)
       : KeyValue(HDTT->HstPtrBegin), HDTT(HDTT) {}
   HostDataToTargetTy *HDTT;
@@ -260,49 +296,89 @@ inline bool operator<(const HostDataToTargetMapKeyTy &LHS,
   return LHS.KeyValue < RHS.KeyValue;
 }
 
-struct LookupResult {
-  struct {
-    unsigned IsContained : 1;
-    unsigned ExtendsBefore : 1;
-    unsigned ExtendsAfter : 1;
-  } Flags;
-
-  /// The corresponding map table entry which is stable.
-  HostDataToTargetTy *Entry = nullptr;
-
-  LookupResult() : Flags({0, 0, 0}), Entry() {}
-};
-
 /// This struct will be returned by \p DeviceTy::getTargetPointer which provides
-/// more data than just a target pointer.
+/// more data than just a target pointer. A TargetPointerResultTy that has a non
+/// null Entry owns the entry. As long as the TargetPointerResultTy (TPR) exists
+/// the entry is locked. To give up ownership without destroying the TPR use the
+/// reset() function.
 struct TargetPointerResultTy {
-  struct {
+  struct FlagTy {
     /// If the map table entry is just created
     unsigned IsNewEntry : 1;
     /// If the pointer is actually a host pointer (when unified memory enabled)
     unsigned IsHostPointer : 1;
     /// If the pointer is present in the mapping table.
     unsigned IsPresent : 1;
-  } Flags = {0, 0, 0};
+    /// Flag indicating that this was the last user of the entry and the ref
+    /// count is now 0.
+    unsigned IsLast : 1;
+  } Flags = {0, 0, 0, 0};
+
+  TargetPointerResultTy(const TargetPointerResultTy &) = delete;
+  TargetPointerResultTy &operator=(const TargetPointerResultTy &TPR) = delete;
+  TargetPointerResultTy() {}
+
+  TargetPointerResultTy(FlagTy Flags, HostDataToTargetTy *Entry,
+                        void *TargetPointer)
+      : Flags(Flags), TargetPointer(TargetPointer), Entry(Entry) {
+    if (Entry)
+      Entry->lock();
+  }
+
+  TargetPointerResultTy(TargetPointerResultTy &&TPR)
+      : Flags(TPR.Flags), TargetPointer(TPR.TargetPointer), Entry(TPR.Entry) {
+    TPR.Entry = nullptr;
+  }
+
+  TargetPointerResultTy &operator=(TargetPointerResultTy &&TPR) {
+    if (&TPR != this) {
+      std::swap(Flags, TPR.Flags);
+      std::swap(Entry, TPR.Entry);
+      std::swap(TargetPointer, TPR.TargetPointer);
+    }
+    return *this;
+  }
+
+  ~TargetPointerResultTy() {
+    if (Entry)
+      Entry->unlock();
+  }
 
   bool isPresent() const { return Flags.IsPresent; }
 
   bool isHostPointer() const { return Flags.IsHostPointer; }
 
-  /// The corresponding map table entry which is stable.
-  HostDataToTargetTy *Entry = nullptr;
-
   /// The corresponding target pointer
   void *TargetPointer = nullptr;
+
+  HostDataToTargetTy *getEntry() const { return Entry; }
+  void setEntry(HostDataToTargetTy *HDTTT,
+                HostDataToTargetTy *OwnedTPR = nullptr) {
+    if (Entry)
+      Entry->unlock();
+    Entry = HDTTT;
+    if (Entry && Entry != OwnedTPR)
+      Entry->lock();
+  }
+
+  void reset() { *this = TargetPointerResultTy(); }
+
+private:
+  /// The corresponding map table entry which is stable.
+  HostDataToTargetTy *Entry = nullptr;
 };
 
-/// Map for shadow pointers
-struct ShadowPtrValTy {
-  void *HstPtrVal;
-  void *TgtPtrAddr;
-  void *TgtPtrVal;
+struct LookupResult {
+  struct {
+    unsigned IsContained : 1;
+    unsigned ExtendsBefore : 1;
+    unsigned ExtendsAfter : 1;
+  } Flags;
+
+  LookupResult() : Flags({0, 0, 0}), TPR() {}
+
+  TargetPointerResultTy TPR;
 };
-typedef std::map<void *, ShadowPtrValTy> ShadowPtrListTy;
 
 ///
 struct PendingCtorDtorListsTy {
@@ -336,9 +412,7 @@ struct DeviceTy {
 
   PendingCtorsDtorsPerLibrary PendingCtorsDtors;
 
-  ShadowPtrListTy ShadowPtrMap;
-
-  std::mutex PendingGlobalsMtx, ShadowMtx;
+  std::mutex PendingGlobalsMtx;
 
   DeviceTy(RTLInfoTy *RTL);
   // DeviceTy is not copyable
@@ -353,7 +427,8 @@ struct DeviceTy {
   /// Lookup the mapping of \p HstPtrBegin in \p HDTTMap. The accessor ensures
   /// exclusive access to the HDTT map.
   LookupResult lookupMapping(HDTTMapAccessorTy &HDTTMap, void *HstPtrBegin,
-                             int64_t Size);
+                             int64_t Size,
+                             HostDataToTargetTy *OwnedTPR = nullptr);
 
   /// Get the target pointer based on host pointer begin and base. If the
   /// mapping already exists, the target pointer will be returned directly. In
@@ -365,12 +440,13 @@ struct DeviceTy {
   /// - Data allocation failed;
   /// - The user tried to do an illegal mapping;
   /// - Data transfer issue fails.
-  TargetPointerResultTy
-  getTargetPointer(void *HstPtrBegin, void *HstPtrBase, int64_t Size,
-                   map_var_info_t HstPtrName, bool HasFlagTo,
-                   bool HasFlagAlways, bool IsImplicit, bool UpdateRefCount,
-                   bool HasCloseModifier, bool HasPresentModifier,
-                   bool HasHoldModifier, AsyncInfoTy &AsyncInfo);
+  TargetPointerResultTy getTargetPointer(
+      HDTTMapAccessorTy &HDTTMap, void *HstPtrBegin, void *HstPtrBase,
+      int64_t Size, map_var_info_t HstPtrName, bool HasFlagTo,
+      bool HasFlagAlways, bool IsImplicit, bool UpdateRefCount,
+      bool HasCloseModifier, bool HasPresentModifier, bool HasHoldModifier,
+      AsyncInfoTy &AsyncInfo, HostDataToTargetTy *OwnedTPR = nullptr,
+      bool ReleaseHDTTMap = true);
 
   /// Return the target pointer for \p HstPtrBegin in \p HDTTMap. The accessor
   /// ensures exclusive access to the HDTT map.
@@ -388,10 +464,9 @@ struct DeviceTy {
   /// - \p FromDataEnd tracks the number of threads referencing the entry at
   /// targetDataEnd for delayed deletion purpose.
   [[nodiscard]] TargetPointerResultTy
-  getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast,
-                 bool UpdateRefCount, bool UseHoldRefCount, bool &IsHostPtr,
-                 bool MustContain = false, bool ForceDelete = false,
-                 bool FromDataEnd = false);
+  getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool UpdateRefCount,
+                 bool UseHoldRefCount, bool MustContain = false,
+                 bool ForceDelete = false, bool FromDataEnd = false);
 
   /// Remove the \p Entry from the data map. Expect the entry's total reference
   /// count to be zero and the caller thread to be the last one using it. \p
@@ -436,10 +511,12 @@ struct DeviceTy {
   // synchronous.
   // Copy data from host to device
   int32_t submitData(void *TgtPtrBegin, void *HstPtrBegin, int64_t Size,
-                     AsyncInfoTy &AsyncInfo);
+                     AsyncInfoTy &AsyncInfo,
+                     HostDataToTargetTy *Entry = nullptr);
   // Copy data from device back to host
   int32_t retrieveData(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size,
-                       AsyncInfoTy &AsyncInfo);
+                       AsyncInfoTy &AsyncInfo,
+                       HostDataToTargetTy *Entry = nullptr);
   // Copy data from current device to destination device directly
   int32_t dataExchange(void *SrcPtr, DeviceTy &DstDev, void *DstPtr,
                        int64_t Size, AsyncInfoTy &AsyncInfo);

diff  --git a/openmp/libomptarget/src/api.cpp b/openmp/libomptarget/src/api.cpp
index 47c9d5e2f1eff..2dfe075b8e746 100644
--- a/openmp/libomptarget/src/api.cpp
+++ b/openmp/libomptarget/src/api.cpp
@@ -116,16 +116,13 @@ EXTERN int omp_target_is_present(const void *Ptr, int DeviceNum) {
   }
 
   DeviceTy &Device = *PM->Devices[DeviceNum];
-  bool IsLast; // not used
-  bool IsHostPtr;
   // omp_target_is_present tests whether a host pointer refers to storage that
   // is mapped to a given device. However, due to the lack of the storage size,
   // only check 1 byte. Cannot set size 0 which checks whether the pointer (zero
   // lengh array) is mapped instead of the referred storage.
-  TargetPointerResultTy TPR =
-      Device.getTgtPtrBegin(const_cast<void *>(Ptr), 1, IsLast,
-                            /*UpdateRefCount=*/false,
-                            /*UseHoldRefCount=*/false, IsHostPtr);
+  TargetPointerResultTy TPR = Device.getTgtPtrBegin(const_cast<void *>(Ptr), 1,
+                                                    /*UpdateRefCount=*/false,
+                                                    /*UseHoldRefCount=*/false);
   int Rc = TPR.isPresent();
   DP("Call to omp_target_is_present returns %d\n", Rc);
   return Rc;
@@ -360,13 +357,10 @@ EXTERN void *omp_get_mapped_ptr(const void *Ptr, int DeviceNum) {
     return nullptr;
   }
 
-  bool IsLast = false;
-  bool IsHostPtr = false;
   auto &Device = *PM->Devices[DeviceNum];
-  TargetPointerResultTy TPR =
-      Device.getTgtPtrBegin(const_cast<void *>(Ptr), 1, IsLast,
-                            /*UpdateRefCount=*/false,
-                            /*UseHoldRefCount=*/false, IsHostPtr);
+  TargetPointerResultTy TPR = Device.getTgtPtrBegin(const_cast<void *>(Ptr), 1,
+                                                    /*UpdateRefCount=*/false,
+                                                    /*UseHoldRefCount=*/false);
   if (!TPR.isPresent()) {
     DP("Ptr " DPxMOD "is not present on device %d, returning nullptr.\n",
        DPxPTR(Ptr), DeviceNum);

diff  --git a/openmp/libomptarget/src/device.cpp b/openmp/libomptarget/src/device.cpp
index 1f5d5a23371db..09c8e808db463 100644
--- a/openmp/libomptarget/src/device.cpp
+++ b/openmp/libomptarget/src/device.cpp
@@ -19,6 +19,7 @@
 #include <climits>
 #include <cstdint>
 #include <cstdio>
+#include <mutex>
 #include <string>
 #include <thread>
 
@@ -51,8 +52,7 @@ int HostDataToTargetTy::addEventIfNecessary(DeviceTy &Device,
 
 DeviceTy::DeviceTy(RTLInfoTy *RTL)
     : DeviceID(-1), RTL(RTL), RTLDeviceID(-1), IsInit(false), InitFlag(),
-      HasPendingGlobals(false), PendingCtorsDtors(), ShadowPtrMap(),
-      PendingGlobalsMtx(), ShadowMtx() {}
+      HasPendingGlobals(false), PendingCtorsDtors(), PendingGlobalsMtx() {}
 
 DeviceTy::~DeviceTy() {
   if (DeviceID == -1 || !(getInfoLevel() & OMP_INFOTYPE_DUMP_TABLE))
@@ -69,6 +69,7 @@ int DeviceTy::associatePtr(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size) {
   auto It = HDTTMap->find(HstPtrBegin);
   if (It != HDTTMap->end()) {
     HostDataToTargetTy &HDTT = *It->HDTT;
+    std::lock_guard<HostDataToTargetTy> LG(HDTT);
     // Mapping already exists
     bool IsValid = HDTT.HstPtrEnd == (uintptr_t)HstPtrBegin + Size &&
                    HDTT.TgtPtrBegin == (uintptr_t)TgtPtrBegin;
@@ -109,39 +110,41 @@ int DeviceTy::disassociatePtr(void *HstPtrBegin) {
   HDTTMapAccessorTy HDTTMap = HostDataToTargetMap.getExclusiveAccessor();
 
   auto It = HDTTMap->find(HstPtrBegin);
-  if (It != HDTTMap->end()) {
-    HostDataToTargetTy &HDTT = *It->HDTT;
-    // Mapping exists
-    if (HDTT.getHoldRefCount()) {
-      // This is based on OpenACC 3.1, sec 3.2.33 "acc_unmap_data", L3656-3657:
-      // "It is an error to call acc_unmap_data if the structured reference
-      // count for the pointer is not zero."
-      REPORT("Trying to disassociate a pointer with a non-zero hold reference "
-             "count\n");
-    } else if (HDTT.isDynRefCountInf()) {
-      DP("Association found, removing it\n");
-      void *Event = HDTT.getEvent();
-      delete &HDTT;
-      if (Event)
-        destroyEvent(Event);
-      HDTTMap->erase(It);
-
-      // Notify the plugin about the unmapped memory.
-      return notifyDataUnmapped(HstPtrBegin);
-    } else {
-      REPORT("Trying to disassociate a pointer which was not mapped via "
-             "omp_target_associate_ptr\n");
-    }
-  } else {
+  if (It == HDTTMap->end()) {
     REPORT("Association not found\n");
+    return OFFLOAD_FAIL;
+  }
+  // Mapping exists
+  HostDataToTargetTy &HDTT = *It->HDTT;
+  std::lock_guard<HostDataToTargetTy> LG(HDTT);
+
+  if (HDTT.getHoldRefCount()) {
+    // This is based on OpenACC 3.1, sec 3.2.33 "acc_unmap_data", L3656-3657:
+    // "It is an error to call acc_unmap_data if the structured reference
+    // count for the pointer is not zero."
+    REPORT("Trying to disassociate a pointer with a non-zero hold reference "
+           "count\n");
+    return OFFLOAD_FAIL;
+  }
+
+  if (HDTT.isDynRefCountInf()) {
+    DP("Association found, removing it\n");
+    void *Event = HDTT.getEvent();
+    delete &HDTT;
+    if (Event)
+      destroyEvent(Event);
+    HDTTMap->erase(It);
+    return OFFLOAD_SUCCESS;
   }
 
-  // Mapping not found
+  REPORT("Trying to disassociate a pointer which was not mapped via "
+         "omp_target_associate_ptr\n");
   return OFFLOAD_FAIL;
 }
 
 LookupResult DeviceTy::lookupMapping(HDTTMapAccessorTy &HDTTMap,
-                                     void *HstPtrBegin, int64_t Size) {
+                                     void *HstPtrBegin, int64_t Size,
+                                     HostDataToTargetTy *OwnedTPR) {
 
   uintptr_t HP = (uintptr_t)HstPtrBegin;
   LookupResult LR;
@@ -159,42 +162,43 @@ LookupResult DeviceTy::lookupMapping(HDTTMapAccessorTy &HDTTMap,
     // upper_bound satisfies
     //   std::prev(upper)->HDTT.HstPtrBegin <= hp < upper->HDTT.HstPtrBegin
     if (Upper != HDTTMap->begin()) {
-      LR.Entry = std::prev(Upper)->HDTT;
-      auto &HT = *LR.Entry;
+      LR.TPR.setEntry(std::prev(Upper)->HDTT, OwnedTPR);
       // the left side of extended address range is satisified.
-      // hp >= HT.HstPtrBegin || hp >= HT.HstPtrBase
-      LR.Flags.IsContained = HP < HT.HstPtrEnd || HP < HT.HstPtrBase;
+      // hp >= LR.TPR.getEntry()->HstPtrBegin || hp >=
+      // LR.TPR.getEntry()->HstPtrBase
+      LR.Flags.IsContained = HP < LR.TPR.getEntry()->HstPtrEnd ||
+                             HP < LR.TPR.getEntry()->HstPtrBase;
     }
 
     if (!LR.Flags.IsContained && Upper != HDTTMap->end()) {
-      LR.Entry = Upper->HDTT;
-      auto &HT = *LR.Entry;
+      LR.TPR.setEntry(Upper->HDTT, OwnedTPR);
       // the right side of extended address range is satisified.
-      // hp < HT.HstPtrEnd || hp < HT.HstPtrBase
-      LR.Flags.IsContained = HP >= HT.HstPtrBase;
+      // hp < LR.TPR.getEntry()->HstPtrEnd || hp < LR.TPR.getEntry()->HstPtrBase
+      LR.Flags.IsContained = HP >= LR.TPR.getEntry()->HstPtrBase;
     }
   } else {
     // check the left bin
     if (Upper != HDTTMap->begin()) {
-      LR.Entry = std::prev(Upper)->HDTT;
-      auto &HT = *LR.Entry;
+      LR.TPR.setEntry(std::prev(Upper)->HDTT, OwnedTPR);
       // Is it contained?
-      LR.Flags.IsContained = HP >= HT.HstPtrBegin && HP < HT.HstPtrEnd &&
-                             (HP + Size) <= HT.HstPtrEnd;
+      LR.Flags.IsContained = HP >= LR.TPR.getEntry()->HstPtrBegin &&
+                             HP < LR.TPR.getEntry()->HstPtrEnd &&
+                             (HP + Size) <= LR.TPR.getEntry()->HstPtrEnd;
       // Does it extend beyond the mapped region?
-      LR.Flags.ExtendsAfter = HP < HT.HstPtrEnd && (HP + Size) > HT.HstPtrEnd;
+      LR.Flags.ExtendsAfter = HP < LR.TPR.getEntry()->HstPtrEnd &&
+                              (HP + Size) > LR.TPR.getEntry()->HstPtrEnd;
     }
 
     // check the right bin
     if (!(LR.Flags.IsContained || LR.Flags.ExtendsAfter) &&
         Upper != HDTTMap->end()) {
-      LR.Entry = Upper->HDTT;
-      auto &HT = *LR.Entry;
+      LR.TPR.setEntry(Upper->HDTT, OwnedTPR);
       // Does it extend into an already mapped region?
-      LR.Flags.ExtendsBefore =
-          HP < HT.HstPtrBegin && (HP + Size) > HT.HstPtrBegin;
+      LR.Flags.ExtendsBefore = HP < LR.TPR.getEntry()->HstPtrBegin &&
+                               (HP + Size) > LR.TPR.getEntry()->HstPtrBegin;
       // Does it extend beyond the mapped region?
-      LR.Flags.ExtendsAfter = HP < HT.HstPtrEnd && (HP + Size) > HT.HstPtrEnd;
+      LR.Flags.ExtendsAfter = HP < LR.TPR.getEntry()->HstPtrEnd &&
+                              (HP + Size) > LR.TPR.getEntry()->HstPtrEnd;
     }
 
     if (LR.Flags.ExtendsBefore) {
@@ -211,19 +215,19 @@ LookupResult DeviceTy::lookupMapping(HDTTMapAccessorTy &HDTTMap,
 }
 
 TargetPointerResultTy DeviceTy::getTargetPointer(
-    void *HstPtrBegin, void *HstPtrBase, int64_t Size,
-    map_var_info_t HstPtrName, bool HasFlagTo, bool HasFlagAlways,
+    HDTTMapAccessorTy &HDTTMap, void *HstPtrBegin, void *HstPtrBase,
+    int64_t Size, map_var_info_t HstPtrName, bool HasFlagTo, bool HasFlagAlways,
     bool IsImplicit, bool UpdateRefCount, bool HasCloseModifier,
-    bool HasPresentModifier, bool HasHoldModifier, AsyncInfoTy &AsyncInfo) {
-  HDTTMapAccessorTy HDTTMap = HostDataToTargetMap.getExclusiveAccessor();
+    bool HasPresentModifier, bool HasHoldModifier, AsyncInfoTy &AsyncInfo,
+    HostDataToTargetTy *OwnedTPR, bool ReleaseHDTTMap) {
 
-  void *TargetPointer = nullptr;
-  bool IsHostPtr = false;
-  bool IsPresent = true;
-  bool IsNew = false;
+  LookupResult LR = lookupMapping(HDTTMap, HstPtrBegin, Size, OwnedTPR);
+  LR.TPR.Flags.IsPresent = true;
 
-  LookupResult LR = lookupMapping(HDTTMap, HstPtrBegin, Size);
-  auto *Entry = LR.Entry;
+  // Release the mapping table lock only after the entry is locked by
+  // attaching it to TPR. Once TPR is destroyed it will release the lock
+  // on entry. If it is returned the lock will move to the returned object.
+  // If LR.Entry is already owned/locked we avoid trying to lock it again.
 
   // Check if the pointer is contained.
   // If a variable is mapped to the device manually by the user - which would
@@ -231,38 +235,38 @@ TargetPointerResultTy DeviceTy::getTargetPointer(
   // device address is returned even under unified memory conditions.
   if (LR.Flags.IsContained ||
       ((LR.Flags.ExtendsBefore || LR.Flags.ExtendsAfter) && IsImplicit)) {
-    auto &HT = *LR.Entry;
     const char *RefCountAction;
     if (UpdateRefCount) {
       // After this, reference count >= 1. If the reference count was 0 but the
       // entry was still there we can reuse the data on the device and avoid a
       // new submission.
-      HT.incRefCount(HasHoldModifier);
+      LR.TPR.getEntry()->incRefCount(HasHoldModifier);
       RefCountAction = " (incremented)";
     } else {
       // It might have been allocated with the parent, but it's still new.
-      IsNew = HT.getTotalRefCount() == 1;
+      LR.TPR.Flags.IsNewEntry = LR.TPR.getEntry()->getTotalRefCount() == 1;
       RefCountAction = " (update suppressed)";
     }
     const char *DynRefCountAction = HasHoldModifier ? "" : RefCountAction;
     const char *HoldRefCountAction = HasHoldModifier ? RefCountAction : "";
-    uintptr_t Ptr = HT.TgtPtrBegin + ((uintptr_t)HstPtrBegin - HT.HstPtrBegin);
+    uintptr_t Ptr = LR.TPR.getEntry()->TgtPtrBegin +
+                    ((uintptr_t)HstPtrBegin - LR.TPR.getEntry()->HstPtrBegin);
     INFO(OMP_INFOTYPE_MAPPING_EXISTS, DeviceID,
          "Mapping exists%s with HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD
          ", Size=%" PRId64 ", DynRefCount=%s%s, HoldRefCount=%s%s, Name=%s\n",
          (IsImplicit ? " (implicit)" : ""), DPxPTR(HstPtrBegin), DPxPTR(Ptr),
-         Size, HT.dynRefCountToStr().c_str(), DynRefCountAction,
-         HT.holdRefCountToStr().c_str(), HoldRefCountAction,
+         Size, LR.TPR.getEntry()->dynRefCountToStr().c_str(), DynRefCountAction,
+         LR.TPR.getEntry()->holdRefCountToStr().c_str(), HoldRefCountAction,
          (HstPtrName) ? getNameFromMapping(HstPtrName).c_str() : "unknown");
-    TargetPointer = (void *)Ptr;
+    LR.TPR.TargetPointer = (void *)Ptr;
   } else if ((LR.Flags.ExtendsBefore || LR.Flags.ExtendsAfter) && !IsImplicit) {
     // Explicit extension of mapped data - not allowed.
     MESSAGE("explicit extension not allowed: host address specified is " DPxMOD
             " (%" PRId64
             " bytes), but device allocation maps to host at " DPxMOD
             " (%" PRId64 " bytes)",
-            DPxPTR(HstPtrBegin), Size, DPxPTR(Entry->HstPtrBegin),
-            Entry->HstPtrEnd - Entry->HstPtrBegin);
+            DPxPTR(HstPtrBegin), Size, DPxPTR(LR.TPR.getEntry()->HstPtrBegin),
+            LR.TPR.getEntry()->HstPtrEnd - LR.TPR.getEntry()->HstPtrBegin);
     if (HasPresentModifier)
       MESSAGE("device mapping required by 'present' map type modifier does not "
               "exist for host address " DPxMOD " (%" PRId64 " bytes)",
@@ -279,9 +283,9 @@ TargetPointerResultTy DeviceTy::getTargetPointer(
       DP("Return HstPtrBegin " DPxMOD " Size=%" PRId64 " for unified shared "
          "memory\n",
          DPxPTR((uintptr_t)HstPtrBegin), Size);
-      IsPresent = false;
-      IsHostPtr = true;
-      TargetPointer = HstPtrBegin;
+      LR.TPR.Flags.IsPresent = false;
+      LR.TPR.Flags.IsHostPointer = true;
+      LR.TPR.TargetPointer = HstPtrBegin;
     }
   } else if (HasPresentModifier) {
     DP("Mapping required by 'present' map type modifier does not exist for "
@@ -292,22 +296,25 @@ TargetPointerResultTy DeviceTy::getTargetPointer(
             DPxPTR(HstPtrBegin), Size);
   } else if (Size) {
     // If it is not contained and Size > 0, we should create a new entry for it.
-    IsNew = true;
+    LR.TPR.Flags.IsNewEntry = true;
     uintptr_t Ptr = (uintptr_t)allocData(Size, HstPtrBegin);
-    Entry = HDTTMap
-                ->emplace(new HostDataToTargetTy(
-                    (uintptr_t)HstPtrBase, (uintptr_t)HstPtrBegin,
-                    (uintptr_t)HstPtrBegin + Size, Ptr, HasHoldModifier,
-                    HstPtrName))
-                .first->HDTT;
+    // Release the mapping table lock only after the entry is locked by
+    // attaching it to TPR.
+    LR.TPR.setEntry(HDTTMap
+                        ->emplace(new HostDataToTargetTy(
+                            (uintptr_t)HstPtrBase, (uintptr_t)HstPtrBegin,
+                            (uintptr_t)HstPtrBegin + Size, Ptr, HasHoldModifier,
+                            HstPtrName))
+                        .first->HDTT);
     INFO(OMP_INFOTYPE_MAPPING_CHANGED, DeviceID,
          "Creating new map entry with HstPtrBase=" DPxMOD
          ", HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD ", Size=%ld, "
          "DynRefCount=%s, HoldRefCount=%s, Name=%s\n",
          DPxPTR(HstPtrBase), DPxPTR(HstPtrBegin), DPxPTR(Ptr), Size,
-         Entry->dynRefCountToStr().c_str(), Entry->holdRefCountToStr().c_str(),
+         LR.TPR.getEntry()->dynRefCountToStr().c_str(),
+         LR.TPR.getEntry()->holdRefCountToStr().c_str(),
          (HstPtrName) ? getNameFromMapping(HstPtrName).c_str() : "unknown");
-    TargetPointer = (void *)Ptr;
+    LR.TPR.TargetPointer = (void *)Ptr;
 
     // Notify the plugin about the new mapping.
     if (notifyDataMapped(HstPtrBegin, Size))
@@ -316,42 +323,41 @@ TargetPointerResultTy DeviceTy::getTargetPointer(
               nullptr /* TargetPointer */};
   } else {
     // This entry is not present and we did not create a new entry for it.
-    IsPresent = false;
+    LR.TPR.Flags.IsPresent = false;
   }
 
-  // If the target pointer is valid, and we need to transfer data, issue the
-  // data transfer.
-  if (TargetPointer && !IsHostPtr && HasFlagTo && (IsNew || HasFlagAlways) &&
-      Size != 0) {
-    // Lock the entry before releasing the mapping table lock such that another
-    // thread that could issue data movement will get the right result.
-    std::lock_guard<decltype(*Entry)> LG(*Entry);
-    // Release the mapping table lock right after the entry is locked.
+  // All mapping table modifications have been made. If the user requested it we
+  // give up the lock.
+  if (ReleaseHDTTMap)
     HDTTMap.destroy();
 
+  // If the target pointer is valid, and we need to transfer data, issue the
+  // data transfer.
+  if (LR.TPR.TargetPointer && !LR.TPR.Flags.IsHostPointer && HasFlagTo &&
+      (LR.TPR.Flags.IsNewEntry || HasFlagAlways) && Size != 0) {
     DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n", Size,
-       DPxPTR(HstPtrBegin), DPxPTR(TargetPointer));
+       DPxPTR(HstPtrBegin), DPxPTR(LR.TPR.TargetPointer));
 
-    int Ret = submitData(TargetPointer, HstPtrBegin, Size, AsyncInfo);
+    int Ret = submitData(LR.TPR.TargetPointer, HstPtrBegin, Size, AsyncInfo,
+                         LR.TPR.getEntry());
     if (Ret != OFFLOAD_SUCCESS) {
       REPORT("Copying data to device failed.\n");
       // We will also return nullptr if the data movement fails because that
       // pointer points to a corrupted memory region so it doesn't make any
       // sense to continue to use it.
-      TargetPointer = nullptr;
-    } else if (Entry->addEventIfNecessary(*this, AsyncInfo) != OFFLOAD_SUCCESS)
+      LR.TPR.TargetPointer = nullptr;
+    } else if (LR.TPR.getEntry()->addEventIfNecessary(*this, AsyncInfo) !=
+               OFFLOAD_SUCCESS)
       return {{false /* IsNewEntry */, false /* IsHostPointer */},
               nullptr /* Entry */,
               nullptr /* TargetPointer */};
   } else {
-    // Release the mapping table lock directly.
-    HDTTMap.destroy();
     // If not a host pointer and no present modifier, we need to wait for the
     // event if it exists.
     // Note: Entry might be nullptr because of zero length array section.
-    if (Entry && !IsHostPtr && !HasPresentModifier) {
-      std::lock_guard<decltype(*Entry)> LG(*Entry);
-      void *Event = Entry->getEvent();
+    if (LR.TPR.getEntry() && !LR.TPR.Flags.IsHostPointer &&
+        !HasPresentModifier) {
+      void *Event = LR.TPR.getEntry()->getEvent();
       if (Event) {
         int Ret = waitEvent(Event, AsyncInfo);
         if (Ret != OFFLOAD_SUCCESS) {
@@ -366,31 +372,28 @@ TargetPointerResultTy DeviceTy::getTargetPointer(
     }
   }
 
-  return {{IsNew, IsHostPtr, IsPresent}, Entry, TargetPointer};
+  return std::move(LR.TPR);
 }
 
 TargetPointerResultTy
-DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast,
-                         bool UpdateRefCount, bool UseHoldRefCount,
-                         bool &IsHostPtr, bool MustContain, bool ForceDelete,
-                         bool FromDataEnd) {
+DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool UpdateRefCount,
+                         bool UseHoldRefCount, bool MustContain,
+                         bool ForceDelete, bool FromDataEnd) {
   HDTTMapAccessorTy HDTTMap = HostDataToTargetMap.getExclusiveAccessor();
 
-  void *TargetPointer = NULL;
-  bool IsNew = false;
-  bool IsPresent = true;
-  IsHostPtr = false;
-  IsLast = false;
   LookupResult LR = lookupMapping(HDTTMap, HstPtrBegin, Size);
 
+  LR.TPR.Flags.IsPresent = true;
+
   if (LR.Flags.IsContained ||
       (!MustContain && (LR.Flags.ExtendsBefore || LR.Flags.ExtendsAfter))) {
-    auto &HT = *LR.Entry;
-    IsLast = HT.decShouldRemove(UseHoldRefCount, ForceDelete);
+    LR.TPR.Flags.IsLast =
+        LR.TPR.getEntry()->decShouldRemove(UseHoldRefCount, ForceDelete);
 
     if (ForceDelete) {
-      HT.resetRefCount(UseHoldRefCount);
-      assert(IsLast == HT.decShouldRemove(UseHoldRefCount) &&
+      LR.TPR.getEntry()->resetRefCount(UseHoldRefCount);
+      assert(LR.TPR.Flags.IsLast ==
+                 LR.TPR.getEntry()->decShouldRemove(UseHoldRefCount) &&
              "expected correct IsLast prediction for reset");
     }
 
@@ -400,32 +403,34 @@ DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast,
     // for it. Thus, we must track every query on targetDataEnds to ensure only
     // the last thread that holds a reference to an entry actually deletes it.
     if (FromDataEnd)
-      HT.incDataEndThreadCount();
+      LR.TPR.getEntry()->incDataEndThreadCount();
 
     const char *RefCountAction;
     if (!UpdateRefCount) {
       RefCountAction = " (update suppressed)";
-    } else if (IsLast) {
-      HT.decRefCount(UseHoldRefCount);
-      assert(HT.getTotalRefCount() == 0 &&
+    } else if (LR.TPR.Flags.IsLast) {
+      LR.TPR.getEntry()->decRefCount(UseHoldRefCount);
+      assert(LR.TPR.getEntry()->getTotalRefCount() == 0 &&
              "Expected zero reference count when deletion is scheduled");
       if (ForceDelete)
         RefCountAction = " (reset, delayed deletion)";
       else
         RefCountAction = " (decremented, delayed deletion)";
     } else {
-      HT.decRefCount(UseHoldRefCount);
+      LR.TPR.getEntry()->decRefCount(UseHoldRefCount);
       RefCountAction = " (decremented)";
     }
     const char *DynRefCountAction = UseHoldRefCount ? "" : RefCountAction;
     const char *HoldRefCountAction = UseHoldRefCount ? RefCountAction : "";
-    uintptr_t TP = HT.TgtPtrBegin + ((uintptr_t)HstPtrBegin - HT.HstPtrBegin);
+    uintptr_t TP = LR.TPR.getEntry()->TgtPtrBegin +
+                   ((uintptr_t)HstPtrBegin - LR.TPR.getEntry()->HstPtrBegin);
     INFO(OMP_INFOTYPE_MAPPING_EXISTS, DeviceID,
          "Mapping exists with HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD ", "
          "Size=%" PRId64 ", DynRefCount=%s%s, HoldRefCount=%s%s\n",
-         DPxPTR(HstPtrBegin), DPxPTR(TP), Size, HT.dynRefCountToStr().c_str(),
-         DynRefCountAction, HT.holdRefCountToStr().c_str(), HoldRefCountAction);
-    TargetPointer = (void *)TP;
+         DPxPTR(HstPtrBegin), DPxPTR(TP), Size,
+         LR.TPR.getEntry()->dynRefCountToStr().c_str(), DynRefCountAction,
+         LR.TPR.getEntry()->holdRefCountToStr().c_str(), HoldRefCountAction);
+    LR.TPR.TargetPointer = (void *)TP;
   } else if (PM->RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) {
     // If the value isn't found in the mapping and unified shared memory
     // is on then it means we have stumbled upon a value which we need to
@@ -433,18 +438,18 @@ DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast,
     DP("Get HstPtrBegin " DPxMOD " Size=%" PRId64 " for unified shared "
        "memory\n",
        DPxPTR((uintptr_t)HstPtrBegin), Size);
-    IsPresent = false;
-    IsHostPtr = true;
-    TargetPointer = HstPtrBegin;
+    LR.TPR.Flags.IsPresent = false;
+    LR.TPR.Flags.IsHostPointer = true;
+    LR.TPR.TargetPointer = HstPtrBegin;
   } else {
     // OpenMP Specification v5.2: if a matching list item is not found, the
     // pointer retains its original value as per firstprivate semantics.
-    IsPresent = false;
-    IsHostPtr = false;
-    TargetPointer = HstPtrBegin;
+    LR.TPR.Flags.IsPresent = false;
+    LR.TPR.Flags.IsHostPointer = false;
+    LR.TPR.TargetPointer = HstPtrBegin;
   }
 
-  return {{IsNew, IsHostPtr, IsPresent}, LR.Entry, TargetPointer};
+  return std::move(LR.TPR);
 }
 
 // Return the target pointer begin (where the data will be moved).
@@ -453,8 +458,8 @@ void *DeviceTy::getTgtPtrBegin(HDTTMapAccessorTy &HDTTMap, void *HstPtrBegin,
   uintptr_t HP = (uintptr_t)HstPtrBegin;
   LookupResult LR = lookupMapping(HDTTMap, HstPtrBegin, Size);
   if (LR.Flags.IsContained || LR.Flags.ExtendsBefore || LR.Flags.ExtendsAfter) {
-    auto &HT = *LR.Entry;
-    uintptr_t TP = HT.TgtPtrBegin + (HP - HT.HstPtrBegin);
+    uintptr_t TP =
+        LR.TPR.getEntry()->TgtPtrBegin + (HP - LR.TPR.getEntry()->HstPtrBegin);
     return (void *)TP;
   }
 
@@ -550,20 +555,32 @@ int32_t DeviceTy::deleteData(void *TgtPtrBegin, int32_t Kind) {
   return RTL->data_delete(RTLDeviceID, TgtPtrBegin, Kind);
 }
 
+static void printCopyInfo(int DeviceId, bool H2D, void *SrcPtrBegin,
+                          void *DstPtrBegin, int64_t Size,
+                          HostDataToTargetTy *HT) {
+
+  INFO(OMP_INFOTYPE_DATA_TRANSFER, DeviceId,
+       "Copying data from %s to %s, %sPtr=" DPxMOD ", %sPtr=" DPxMOD
+       ", Size=%" PRId64 ", Name=%s\n",
+       H2D ? "host" : "device", H2D ? "device" : "host", H2D ? "Hst" : "Tgt",
+       DPxPTR(SrcPtrBegin), H2D ? "Tgt" : "Hst", DPxPTR(DstPtrBegin), Size,
+       (HT && HT->HstPtrName) ? getNameFromMapping(HT->HstPtrName).c_str()
+                              : "unknown");
+}
+
 // Submit data to device
 int32_t DeviceTy::submitData(void *TgtPtrBegin, void *HstPtrBegin, int64_t Size,
-                             AsyncInfoTy &AsyncInfo) {
+                             AsyncInfoTy &AsyncInfo,
+                             HostDataToTargetTy *Entry) {
   if (getInfoLevel() & OMP_INFOTYPE_DATA_TRANSFER) {
-    HDTTMapAccessorTy HDTTMap = HostDataToTargetMap.getExclusiveAccessor();
-    LookupResult LR = lookupMapping(HDTTMap, HstPtrBegin, Size);
-    auto *HT = &*LR.Entry;
-
-    INFO(OMP_INFOTYPE_DATA_TRANSFER, DeviceID,
-         "Copying data from host to device, HstPtr=" DPxMOD ", TgtPtr=" DPxMOD
-         ", Size=%" PRId64 ", Name=%s\n",
-         DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin), Size,
-         (HT && HT->HstPtrName) ? getNameFromMapping(HT->HstPtrName).c_str()
-                                : "unknown");
+    HDTTMapAccessorTy HDTTMap = HostDataToTargetMap.getExclusiveAccessor(Entry);
+    LookupResult LR;
+    if (!Entry) {
+      LR = lookupMapping(HDTTMap, HstPtrBegin, Size);
+      Entry = LR.TPR.getEntry();
+    }
+    printCopyInfo(DeviceID, /* H2D */ true, HstPtrBegin, TgtPtrBegin, Size,
+                  Entry);
   }
 
   if (!AsyncInfo || !RTL->data_submit_async || !RTL->synchronize)
@@ -574,17 +591,17 @@ int32_t DeviceTy::submitData(void *TgtPtrBegin, void *HstPtrBegin, int64_t Size,
 
 // Retrieve data from device
 int32_t DeviceTy::retrieveData(void *HstPtrBegin, void *TgtPtrBegin,
-                               int64_t Size, AsyncInfoTy &AsyncInfo) {
+                               int64_t Size, AsyncInfoTy &AsyncInfo,
+                               HostDataToTargetTy *Entry) {
   if (getInfoLevel() & OMP_INFOTYPE_DATA_TRANSFER) {
-    HDTTMapAccessorTy HDTTMap = HostDataToTargetMap.getExclusiveAccessor();
-    LookupResult LR = lookupMapping(HDTTMap, HstPtrBegin, Size);
-    auto *HT = &*LR.Entry;
-    INFO(OMP_INFOTYPE_DATA_TRANSFER, DeviceID,
-         "Copying data from device to host, TgtPtr=" DPxMOD ", HstPtr=" DPxMOD
-         ", Size=%" PRId64 ", Name=%s\n",
-         DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin), Size,
-         (HT && HT->HstPtrName) ? getNameFromMapping(HT->HstPtrName).c_str()
-                                : "unknown");
+    HDTTMapAccessorTy HDTTMap = HostDataToTargetMap.getExclusiveAccessor(Entry);
+    LookupResult LR;
+    if (!Entry) {
+      LR = lookupMapping(HDTTMap, HstPtrBegin, Size);
+      Entry = LR.TPR.getEntry();
+    }
+    printCopyInfo(DeviceID, /* H2D */ false, TgtPtrBegin, HstPtrBegin, Size,
+                  Entry);
   }
 
   if (!RTL->data_retrieve_async || !RTL->synchronize)

diff  --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp
index 2158b948bc9ec..69af6ac24a41d 100644
--- a/openmp/libomptarget/src/omptarget.cpp
+++ b/openmp/libomptarget/src/omptarget.cpp
@@ -633,6 +633,9 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
     // may be considered a hack, we could revise the scheme in the future.
     bool UpdateRef =
         !(ArgTypes[I] & OMP_TGT_MAPTYPE_MEMBER_OF) && !(FromMapper && I == 0);
+
+    DeviceTy::HDTTMapAccessorTy HDTTMap =
+        Device.HostDataToTargetMap.getExclusiveAccessor();
     if (ArgTypes[I] & OMP_TGT_MAPTYPE_PTR_AND_OBJ) {
       DP("Has a pointer entry: \n");
       // Base is address of pointer.
@@ -649,9 +652,11 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
       // PTR_AND_OBJ entry is handled below, and so the allocation might fail
       // when HasPresentModifier.
       PointerTpr = Device.getTargetPointer(
-          HstPtrBase, HstPtrBase, sizeof(void *), /*HstPtrName=*/nullptr,
+          HDTTMap, HstPtrBase, HstPtrBase, sizeof(void *),
+          /*HstPtrName=*/nullptr,
           /*HasFlagTo=*/false, /*HasFlagAlways=*/false, IsImplicit, UpdateRef,
-          HasCloseModifier, HasPresentModifier, HasHoldModifier, AsyncInfo);
+          HasCloseModifier, HasPresentModifier, HasHoldModifier, AsyncInfo,
+          /* OwnedTPR */ nullptr, /* ReleaseHDTTMap */ false);
       PointerTgtPtrBegin = PointerTpr.TargetPointer;
       IsHostPtr = PointerTpr.Flags.IsHostPointer;
       if (!PointerTgtPtrBegin) {
@@ -675,10 +680,11 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
 
     const bool HasFlagTo = ArgTypes[I] & OMP_TGT_MAPTYPE_TO;
     const bool HasFlagAlways = ArgTypes[I] & OMP_TGT_MAPTYPE_ALWAYS;
+    // Note that HDTTMap will be released in getTargetPointer.
     auto TPR = Device.getTargetPointer(
-        HstPtrBegin, HstPtrBase, DataSize, HstPtrName, HasFlagTo, HasFlagAlways,
-        IsImplicit, UpdateRef, HasCloseModifier, HasPresentModifier,
-        HasHoldModifier, AsyncInfo);
+        HDTTMap, HstPtrBegin, HstPtrBase, DataSize, HstPtrName, HasFlagTo,
+        HasFlagAlways, IsImplicit, UpdateRef, HasCloseModifier,
+        HasPresentModifier, HasHoldModifier, AsyncInfo, PointerTpr.getEntry());
     void *TgtPtrBegin = TPR.TargetPointer;
     IsHostPtr = TPR.Flags.IsHostPointer;
     // If data_size==0, then the argument could be a zero-length pointer to
@@ -701,55 +707,30 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
     }
 
     if (ArgTypes[I] & OMP_TGT_MAPTYPE_PTR_AND_OBJ && !IsHostPtr) {
-      // Check whether we need to update the pointer on the device
-      bool UpdateDevPtr = false;
 
       uint64_t Delta = (uint64_t)HstPtrBegin - (uint64_t)HstPtrBase;
       void *ExpectedTgtPtrBase = (void *)((uint64_t)TgtPtrBegin - Delta);
 
-      Device.ShadowMtx.lock();
-      auto Entry = Device.ShadowPtrMap.find(PointerHstPtrBegin);
-      // If this pointer is not in the map we need to insert it. If the map
-      // contains a stale entry, we need to update it (e.g. if the pointee was
-      // deallocated and later on is reallocated at another device address). The
-      // latter scenario is the subject of LIT test env/base_ptr_ref_count.c. An
-      // entry is removed from ShadowPtrMap only when the PTR of a PTR_AND_OBJ
-      // pair is deallocated, not when the OBJ is deallocated. In
-      // env/base_ptr_ref_count.c the PTR is a global "declare target" pointer,
-      // so it stays in the map for the lifetime of the application. When the
-      // OBJ is deallocated and later on allocated again (at a 
diff erent device
-      // address), ShadowPtrMap still contains an entry for Pointer_HstPtrBegin
-      // which is stale, pointing to the old ExpectedTgtPtrBase of the OBJ.
-      if (Entry == Device.ShadowPtrMap.end() ||
-          Entry->second.TgtPtrVal != ExpectedTgtPtrBase) {
-        // create or update shadow pointers for this entry
-        Device.ShadowPtrMap[PointerHstPtrBegin] = {
-            HstPtrBase, PointerTgtPtrBegin, ExpectedTgtPtrBase};
-        PointerTpr.Entry->setMayContainAttachedPointers();
-        UpdateDevPtr = true;
-      }
-
-      if (UpdateDevPtr) {
-        std::lock_guard<decltype(*PointerTpr.Entry)> LG(*PointerTpr.Entry);
-        Device.ShadowMtx.unlock();
-
+      if (PointerTpr.getEntry()->addShadowPointer(ShadowPtrInfoTy{
+              (void **)PointerHstPtrBegin, HstPtrBase,
+              (void **)PointerTgtPtrBegin, ExpectedTgtPtrBase})) {
         DP("Update pointer (" DPxMOD ") -> [" DPxMOD "]\n",
            DPxPTR(PointerTgtPtrBegin), DPxPTR(TgtPtrBegin));
 
         void *&TgtPtrBase = AsyncInfo.getVoidPtrLocation();
         TgtPtrBase = ExpectedTgtPtrBase;
 
-        int Ret = Device.submitData(PointerTgtPtrBegin, &TgtPtrBase,
-                                    sizeof(void *), AsyncInfo);
+        int Ret =
+            Device.submitData(PointerTgtPtrBegin, &TgtPtrBase, sizeof(void *),
+                              AsyncInfo, PointerTpr.getEntry());
         if (Ret != OFFLOAD_SUCCESS) {
           REPORT("Copying data to device failed.\n");
           return OFFLOAD_FAIL;
         }
-        if (PointerTpr.Entry->addEventIfNecessary(Device, AsyncInfo) !=
+        if (PointerTpr.getEntry()->addEventIfNecessary(Device, AsyncInfo) !=
             OFFLOAD_SUCCESS)
           return OFFLOAD_FAIL;
-      } else
-        Device.ShadowMtx.unlock();
+      }
     }
   }
 
@@ -777,51 +758,11 @@ struct PostProcessingInfo {
   TargetPointerResultTy TPR;
 
   PostProcessingInfo(void *HstPtr, int64_t Size, int64_t ArgType,
-                     int32_t ArgIndex, TargetPointerResultTy TPR)
+                     int32_t ArgIndex, TargetPointerResultTy &&TPR)
       : HstPtrBegin(HstPtr), DataSize(Size), ArgType(ArgType),
-        ArgIndex(ArgIndex), TPR(TPR) {}
+        ArgIndex(ArgIndex), TPR(std::move(TPR)) {}
 };
 
-/// Apply \p CB to the shadow map pointer entries in the range \p Begin, to
-/// \p Begin + \p Size. \p CB is called with a locked shadow pointer map and the
-/// passed iterator can be updated. If the callback returns OFFLOAD_FAIL the
-/// rest of the map is not checked anymore.
-template <typename CBTy>
-static void applyToShadowMapEntries(DeviceTy &Device, CBTy CB, void *Begin,
-                                    uintptr_t Size,
-                                    const TargetPointerResultTy &TPR) {
-  // If we have an object that is too small to hold a pointer subobject, no need
-  // to do any checking.
-  if (Size < sizeof(void *))
-    return;
-
-  // If the map entry for the object was never marked as containing attached
-  // pointers, no need to do any checking.
-  if (!TPR.Entry || !TPR.Entry->getMayContainAttachedPointers())
-    return;
-
-  uintptr_t LB = (uintptr_t)Begin;
-  uintptr_t UB = LB + Size;
-  // Now we are looking into the shadow map so we need to lock it.
-  std::lock_guard<decltype(Device.ShadowMtx)> LG(Device.ShadowMtx);
-  for (ShadowPtrListTy::iterator Itr = Device.ShadowPtrMap.begin();
-       Itr != Device.ShadowPtrMap.end();) {
-    uintptr_t ShadowHstPtrAddr = (uintptr_t)Itr->first;
-
-    // An STL map is sorted on its keys; use this property
-    // to quickly determine when to break out of the loop.
-    if (ShadowHstPtrAddr < LB) {
-      ++Itr;
-      continue;
-    }
-    if (ShadowHstPtrAddr >= UB)
-      break;
-
-    if (CB(Itr) == OFFLOAD_FAIL)
-      break;
-  }
-}
-
 } // namespace
 
 /// Applies the necessary post-processing procedures to entries listed in \p
@@ -831,7 +772,7 @@ static void applyToShadowMapEntries(DeviceTy &Device, CBTy CB, void *Begin,
 /// according to the successfulness of the operations.
 [[nodiscard]] static int
 postProcessingTargetDataEnd(DeviceTy *Device,
-                            SmallVector<PostProcessingInfo> EntriesInfo,
+                            SmallVector<PostProcessingInfo> &EntriesInfo,
                             bool FromMapper) {
   int Ret = OFFLOAD_SUCCESS;
   void *FromMapperBase = nullptr;
@@ -859,10 +800,16 @@ postProcessingTargetDataEnd(DeviceTy *Device,
     // not request (exclusive) access to the HDTT map if DelEntry is
     // not set.
     DeviceTy::HDTTMapAccessorTy HDTTMap =
-        Device->HostDataToTargetMap.getExclusiveAccessor(!DelEntry);
+        Device->HostDataToTargetMap.getExclusiveAccessor();
+
+    // We cannot use a lock guard because we may end up delete the mutex.
+    // We also explicitly unlocked the entry after it was put in the EntriesInfo
+    // so it can be reused.
+    TPR.getEntry()->lock();
+    auto *Entry = TPR.getEntry();
 
-    const bool IsNotLastUser = TPR.Entry->decDataEndThreadCount() != 0;
-    if (DelEntry && (TPR.Entry->getTotalRefCount() != 0 || IsNotLastUser)) {
+    const bool IsNotLastUser = Entry->decDataEndThreadCount() != 0;
+    if (DelEntry && (Entry->getTotalRefCount() != 0 || IsNotLastUser)) {
       // The thread is not in charge of deletion anymore. Give up access
       // to the HDTT map and unset the deletion flag.
       HDTTMap.destroy();
@@ -874,44 +821,35 @@ postProcessingTargetDataEnd(DeviceTy *Device,
     // shadow copies. If the struct is going to be deallocated, remove any
     // remaining shadow pointer entries for this struct.
     const bool HasFrom = ArgType & OMP_TGT_MAPTYPE_FROM;
-    auto CB = [&](ShadowPtrListTy::iterator &Itr) {
-      // If we copied the struct to the host, we need to restore the
-      // pointer.
-      if (HasFrom) {
-        void **ShadowHstPtrAddr = (void **)Itr->first;
-        *ShadowHstPtrAddr = Itr->second.HstPtrVal;
-        DP("Restoring original host pointer value " DPxMOD " for host "
-           "pointer " DPxMOD "\n",
-           DPxPTR(Itr->second.HstPtrVal), DPxPTR(ShadowHstPtrAddr));
-      }
-      // If the struct is to be deallocated, remove the shadow entry.
-      if (DelEntry) {
-        DP("Removing shadow pointer " DPxMOD "\n", DPxPTR((void **)Itr->first));
-        auto OldItr = Itr;
-        Itr++;
-        Device->ShadowPtrMap.erase(OldItr);
-      } else {
-        ++Itr;
-      }
-      return OFFLOAD_SUCCESS;
-    };
-    applyToShadowMapEntries(*Device, CB, HstPtrBegin, DataSize, TPR);
+    if (HasFrom) {
+      Entry->foreachShadowPointerInfo(
+          [&](const ShadowPtrInfoTy &ShadowPtr) {
+            *ShadowPtr.HstPtrAddr = ShadowPtr.HstPtrVal;
+            DP("Restoring original host pointer value " DPxMOD " for host "
+               "pointer " DPxMOD "\n",
+               DPxPTR(ShadowPtr.HstPtrVal), DPxPTR(ShadowPtr.HstPtrAddr));
+            return OFFLOAD_SUCCESS;
+          });
+    }
+
+    // Give up the lock as we either don't need it anymore (e.g., done with
+    // TPR), or erase TPR.
+    TPR.setEntry(nullptr);
 
     if (!DelEntry || (FromMapperBase && FromMapperBase == HstPtrBegin))
       continue;
 
-    // If we are deleting the entry the DataMapMtx is locked and we own
-    // the entry.
-    Ret = Device->eraseMapEntry(HDTTMap, TPR.Entry, DataSize);
+    Ret = Device->eraseMapEntry(HDTTMap, Entry, DataSize);
     // Entry is already remove from the map, we can unlock it now.
     HDTTMap.destroy();
-    Ret |= Device->deallocTgtPtrAndEntry(TPR.Entry, DataSize);
+    Ret |= Device->deallocTgtPtrAndEntry(Entry, DataSize);
     if (Ret != OFFLOAD_SUCCESS) {
       REPORT("Deallocating data from device failed.\n");
       break;
     }
   }
 
+  delete &EntriesInfo;
   return Ret;
 }
 
@@ -921,7 +859,7 @@ int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
                   int64_t *ArgTypes, map_var_info_t *ArgNames,
                   void **ArgMappers, AsyncInfoTy &AsyncInfo, bool FromMapper) {
   int Ret = OFFLOAD_SUCCESS;
-  SmallVector<PostProcessingInfo> PostProcessingPtrs;
+  auto *PostProcessingPtrs = new SmallVector<PostProcessingInfo>();
   void *FromMapperBase = nullptr;
   // process each input.
   for (int32_t I = ArgNum - 1; I >= 0; --I) {
@@ -972,7 +910,6 @@ int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
       }
     }
 
-    bool IsLast, IsHostPtr;
     bool IsImplicit = ArgTypes[I] & OMP_TGT_MAPTYPE_IMPLICIT;
     bool UpdateRef = (!(ArgTypes[I] & OMP_TGT_MAPTYPE_MEMBER_OF) ||
                       (ArgTypes[I] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)) &&
@@ -982,9 +919,9 @@ int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
     bool HasHoldModifier = ArgTypes[I] & OMP_TGT_MAPTYPE_OMPX_HOLD;
 
     // If PTR_AND_OBJ, HstPtrBegin is address of pointee
-    TargetPointerResultTy TPR = Device.getTgtPtrBegin(
-        HstPtrBegin, DataSize, IsLast, UpdateRef, HasHoldModifier, IsHostPtr,
-        !IsImplicit, ForceDelete, /*FromDataEnd=*/true);
+    TargetPointerResultTy TPR =
+        Device.getTgtPtrBegin(HstPtrBegin, DataSize, UpdateRef, HasHoldModifier,
+                              !IsImplicit, ForceDelete, /*FromDataEnd=*/true);
     void *TgtPtrBegin = TPR.TargetPointer;
     if (!TPR.isPresent() && !TPR.isHostPointer() &&
         (DataSize || HasPresentModifier)) {
@@ -1014,7 +951,7 @@ int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
     } else {
       DP("There are %" PRId64 " bytes allocated at target address " DPxMOD
          " - is%s last\n",
-         DataSize, DPxPTR(TgtPtrBegin), (IsLast ? "" : " not"));
+         DataSize, DPxPTR(TgtPtrBegin), (TPR.Flags.IsLast ? "" : " not"));
     }
 
     // OpenMP 5.1, sec. 2.21.7.1 "map Clause", p. 351 L14-16:
@@ -1028,20 +965,21 @@ int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
     // Move data back to the host
     const bool HasAlways = ArgTypes[I] & OMP_TGT_MAPTYPE_ALWAYS;
     const bool HasFrom = ArgTypes[I] & OMP_TGT_MAPTYPE_FROM;
-    if (HasFrom && (HasAlways || IsLast) && !IsHostPtr && DataSize != 0) {
+    if (HasFrom && (HasAlways || TPR.Flags.IsLast) &&
+        !TPR.Flags.IsHostPointer && DataSize != 0) {
       DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n",
          DataSize, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin));
 
-      std::lock_guard<decltype(*TPR.Entry)> LG(*TPR.Entry);
       // Wait for any previous transfer if an event is present.
-      if (void *Event = TPR.Entry->getEvent()) {
+      if (void *Event = TPR.getEntry()->getEvent()) {
         if (Device.waitEvent(Event, AsyncInfo) != OFFLOAD_SUCCESS) {
           REPORT("Failed to wait for event " DPxMOD ".\n", DPxPTR(Event));
           return OFFLOAD_FAIL;
         }
       }
 
-      Ret = Device.retrieveData(HstPtrBegin, TgtPtrBegin, DataSize, AsyncInfo);
+      Ret = Device.retrieveData(HstPtrBegin, TgtPtrBegin, DataSize, AsyncInfo,
+                                TPR.getEntry());
       if (Ret != OFFLOAD_SUCCESS) {
         REPORT("Copying data from device failed.\n");
         return OFFLOAD_FAIL;
@@ -1052,26 +990,26 @@ int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
       // as the entry can be reused and the reuse might happen after the
       // copy-back was issued but before it completed. Since the reuse might
       // also copy-back a value we would race.
-      if (IsLast) {
-        if (TPR.Entry->addEventIfNecessary(Device, AsyncInfo) !=
+      if (TPR.Flags.IsLast) {
+        if (TPR.getEntry()->addEventIfNecessary(Device, AsyncInfo) !=
             OFFLOAD_SUCCESS)
           return OFFLOAD_FAIL;
       }
     }
 
     // Add pointer to the buffer for post-synchronize processing.
-    PostProcessingPtrs.emplace_back(HstPtrBegin, DataSize, ArgTypes[I], I, TPR);
+    PostProcessingPtrs->emplace_back(HstPtrBegin, DataSize, ArgTypes[I], I,
+                                     std::move(TPR));
+    PostProcessingPtrs->back().TPR.getEntry()->unlock();
   }
 
   // Add post-processing functions
   // TODO: We might want to remove `mutable` in the future by not changing the
   // captured variables somehow.
-  AsyncInfo.addPostProcessingFunction(
-      [=, Device = &Device,
-       PostProcessingPtrs = std::move(PostProcessingPtrs)]() mutable -> int {
-        return postProcessingTargetDataEnd(Device, PostProcessingPtrs,
-                                           FromMapperBase);
-      });
+  AsyncInfo.addPostProcessingFunction([=, Device = &Device]() mutable -> int {
+    return postProcessingTargetDataEnd(Device, *PostProcessingPtrs,
+                                       FromMapperBase);
+  });
 
   return Ret;
 }
@@ -1080,10 +1018,9 @@ static int targetDataContiguous(ident_t *Loc, DeviceTy &Device, void *ArgsBase,
                                 void *HstPtrBegin, int64_t ArgSize,
                                 int64_t ArgType, AsyncInfoTy &AsyncInfo) {
   TIMESCOPE_WITH_IDENT(Loc);
-  bool IsLast, IsHostPtr;
-  TargetPointerResultTy TPR = Device.getTgtPtrBegin(
-      HstPtrBegin, ArgSize, IsLast, /*UpdateRefCount=*/false,
-      /*UseHoldRefCount=*/false, IsHostPtr, /*MustContain=*/true);
+  TargetPointerResultTy TPR =
+      Device.getTgtPtrBegin(HstPtrBegin, ArgSize, /*UpdateRefCount=*/false,
+                            /*UseHoldRefCount=*/false, /*MustContain=*/true);
   void *TgtPtrBegin = TPR.TargetPointer;
   if (!TPR.isPresent()) {
     DP("hst data:" DPxMOD " not found, becomes a noop\n", DPxPTR(HstPtrBegin));
@@ -1096,16 +1033,48 @@ static int targetDataContiguous(ident_t *Loc, DeviceTy &Device, void *ArgsBase,
     return OFFLOAD_SUCCESS;
   }
 
-  if (IsHostPtr) {
+  if (TPR.Flags.IsHostPointer) {
     DP("hst data:" DPxMOD " unified and shared, becomes a noop\n",
        DPxPTR(HstPtrBegin));
     return OFFLOAD_SUCCESS;
   }
 
+  if (ArgType & OMP_TGT_MAPTYPE_TO) {
+    DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n",
+       ArgSize, DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin));
+    int Ret = Device.submitData(TgtPtrBegin, HstPtrBegin, ArgSize, AsyncInfo,
+                                TPR.getEntry());
+    if (Ret != OFFLOAD_SUCCESS) {
+      REPORT("Copying data to device failed.\n");
+      return OFFLOAD_FAIL;
+    }
+    if (TPR.getEntry()) {
+      int Ret = TPR.getEntry()->foreachShadowPointerInfo(
+          [&](const ShadowPtrInfoTy &ShadowPtr) {
+            DP("Restoring original target pointer value " DPxMOD " for target "
+               "pointer " DPxMOD "\n",
+               DPxPTR(ShadowPtr.TgtPtrVal), DPxPTR(ShadowPtr.TgtPtrAddr));
+            Ret = Device.submitData(ShadowPtr.TgtPtrAddr,
+                                    (void *)&ShadowPtr.TgtPtrVal,
+                                    sizeof(void *), AsyncInfo);
+            if (Ret != OFFLOAD_SUCCESS) {
+              REPORT("Copying data to device failed.\n");
+              return OFFLOAD_FAIL;
+            }
+            return OFFLOAD_SUCCESS;
+          });
+      if (Ret != OFFLOAD_SUCCESS) {
+        DP("Updating shadow map failed\n");
+        return Ret;
+      }
+    }
+  }
+
   if (ArgType & OMP_TGT_MAPTYPE_FROM) {
     DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n",
        ArgSize, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin));
-    int Ret = Device.retrieveData(HstPtrBegin, TgtPtrBegin, ArgSize, AsyncInfo);
+    int Ret = Device.retrieveData(HstPtrBegin, TgtPtrBegin, ArgSize, AsyncInfo,
+                                  TPR.getEntry());
     if (Ret != OFFLOAD_SUCCESS) {
       REPORT("Copying data from device failed.\n");
       return OFFLOAD_FAIL;
@@ -1113,44 +1082,26 @@ static int targetDataContiguous(ident_t *Loc, DeviceTy &Device, void *ArgsBase,
 
     // Wait for device-to-host memcopies for whole struct to complete,
     // before restoring the correct host pointer.
-    AsyncInfo.addPostProcessingFunction([=, Device = &Device]() -> int {
-      auto CB = [&](ShadowPtrListTy::iterator &Itr) {
-        void **ShadowHstPtrAddr = (void **)Itr->first;
-        *ShadowHstPtrAddr = Itr->second.HstPtrVal;
-        DP("Restoring original host pointer value " DPxMOD
-           " for host pointer " DPxMOD "\n",
-           DPxPTR(Itr->second.HstPtrVal), DPxPTR(ShadowHstPtrAddr));
-        ++Itr;
+    if (auto *Entry = TPR.getEntry()) {
+      AsyncInfo.addPostProcessingFunction([=]() -> int {
+        int Ret = Entry->foreachShadowPointerInfo(
+            [&](const ShadowPtrInfoTy &ShadowPtr) {
+              *ShadowPtr.HstPtrAddr = ShadowPtr.HstPtrVal;
+              DP("Restoring original host pointer value " DPxMOD
+                 " for host pointer " DPxMOD "\n",
+                 DPxPTR(ShadowPtr.HstPtrVal), DPxPTR(ShadowPtr.HstPtrAddr));
+              return OFFLOAD_SUCCESS;
+            });
+        Entry->unlock();
+        if (Ret != OFFLOAD_SUCCESS) {
+          DP("Updating shadow map failed\n");
+          return Ret;
+        }
         return OFFLOAD_SUCCESS;
-      };
-      applyToShadowMapEntries(*Device, CB, HstPtrBegin, ArgSize, TPR);
-
-      return OFFLOAD_SUCCESS;
-    });
-  }
-
-  if (ArgType & OMP_TGT_MAPTYPE_TO) {
-    DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n",
-       ArgSize, DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin));
-    int Ret = Device.submitData(TgtPtrBegin, HstPtrBegin, ArgSize, AsyncInfo);
-    if (Ret != OFFLOAD_SUCCESS) {
-      REPORT("Copying data to device failed.\n");
-      return OFFLOAD_FAIL;
+      });
     }
-
-    auto CB = [&](ShadowPtrListTy::iterator &Itr) {
-      DP("Restoring original target pointer value " DPxMOD " for target "
-         "pointer " DPxMOD "\n",
-         DPxPTR(Itr->second.TgtPtrVal), DPxPTR(Itr->second.TgtPtrAddr));
-      Ret = Device.submitData(Itr->second.TgtPtrAddr, &Itr->second.TgtPtrVal,
-                              sizeof(void *), AsyncInfo);
-      if (Ret != OFFLOAD_SUCCESS)
-        REPORT("Copying data to device failed.\n");
-      ++Itr;
-      return Ret;
-    };
-    applyToShadowMapEntries(Device, CB, HstPtrBegin, ArgSize, TPR);
   }
+
   return OFFLOAD_SUCCESS;
 }
 
@@ -1538,7 +1489,6 @@ static int processDataBefore(ident_t *Loc, int64_t DeviceId, void *HostPtr,
         void *HstPtrVal = Args[I];
         void *HstPtrBegin = ArgBases[I];
         void *HstPtrBase = Args[Idx];
-        bool IsLast, IsHostPtr; // IsLast is unused.
         void *TgtPtrBase =
             (void *)((intptr_t)TgtArgs[TgtIdx] + TgtOffsets[TgtIdx]);
         DP("Parent lambda base " DPxMOD "\n", DPxPTR(TgtPtrBase));
@@ -1546,15 +1496,15 @@ static int processDataBefore(ident_t *Loc, int64_t DeviceId, void *HostPtr,
         void *TgtPtrBegin = (void *)((uintptr_t)TgtPtrBase + Delta);
         void *&PointerTgtPtrBegin = AsyncInfo.getVoidPtrLocation();
         TargetPointerResultTy TPR = Device.getTgtPtrBegin(
-            HstPtrVal, ArgSizes[I], IsLast, /*UpdateRefCount=*/false,
-            /*UseHoldRefCount=*/false, IsHostPtr);
+            HstPtrVal, ArgSizes[I], /*UpdateRefCount=*/false,
+            /*UseHoldRefCount=*/false);
         PointerTgtPtrBegin = TPR.TargetPointer;
         if (!TPR.isPresent()) {
           DP("No lambda captured variable mapped (" DPxMOD ") - ignored\n",
              DPxPTR(HstPtrVal));
           continue;
         }
-        if (IsHostPtr) {
+        if (TPR.Flags.IsHostPointer) {
           DP("Unified memory is active, no need to map lambda captured"
              "variable (" DPxMOD ")\n",
              DPxPTR(HstPtrVal));
@@ -1563,7 +1513,7 @@ static int processDataBefore(ident_t *Loc, int64_t DeviceId, void *HostPtr,
         DP("Update lambda reference (" DPxMOD ") -> [" DPxMOD "]\n",
            DPxPTR(PointerTgtPtrBegin), DPxPTR(TgtPtrBegin));
         Ret = Device.submitData(TgtPtrBegin, &PointerTgtPtrBegin,
-                                sizeof(void *), AsyncInfo);
+                                sizeof(void *), AsyncInfo, TPR.getEntry());
         if (Ret != OFFLOAD_SUCCESS) {
           REPORT("Copying data to device failed.\n");
           return OFFLOAD_FAIL;
@@ -1576,7 +1526,6 @@ static int processDataBefore(ident_t *Loc, int64_t DeviceId, void *HostPtr,
     void *TgtPtrBegin;
     map_var_info_t HstPtrName = (!ArgNames) ? nullptr : ArgNames[I];
     ptr
diff _t TgtBaseOffset;
-    bool IsLast, IsHostPtr; // unused.
     TargetPointerResultTy TPR;
     if (ArgTypes[I] & OMP_TGT_MAPTYPE_LITERAL) {
       DP("Forwarding first-private value " DPxMOD " to the target construct\n",
@@ -1603,9 +1552,9 @@ static int processDataBefore(ident_t *Loc, int64_t DeviceId, void *HostPtr,
     } else {
       if (ArgTypes[I] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)
         HstPtrBase = *reinterpret_cast<void **>(HstPtrBase);
-      TPR = Device.getTgtPtrBegin(HstPtrBegin, ArgSizes[I], IsLast,
+      TPR = Device.getTgtPtrBegin(HstPtrBegin, ArgSizes[I],
                                   /*UpdateRefCount=*/false,
-                                  /*UseHoldRefCount=*/false, IsHostPtr);
+                                  /*UseHoldRefCount=*/false);
       TgtPtrBegin = TPR.TargetPointer;
       TgtBaseOffset = (intptr_t)HstPtrBase - (intptr_t)HstPtrBegin;
 #ifdef OMPTARGET_DEBUG


        


More information about the Openmp-commits mailing list