[Openmp-commits] [openmp] 18ce3d3 - [OpenMP][Offloading] Fix data race in data mapping by using two locks
Shilei Tian via Openmp-commits
openmp-commits at lists.llvm.org
Fri Jul 23 13:10:56 PDT 2021
Author: Shilei Tian
Date: 2021-07-23T16:10:51-04:00
New Revision: 18ce3d3f2c362b7fda33ebd7b4d544e9cae23ad4
URL: https://github.com/llvm/llvm-project/commit/18ce3d3f2c362b7fda33ebd7b4d544e9cae23ad4
DIFF: https://github.com/llvm/llvm-project/commit/18ce3d3f2c362b7fda33ebd7b4d544e9cae23ad4.diff
LOG: [OpenMP][Offloading] Fix data race in data mapping by using two locks
This patch tries to partially fix one of the two data race issues reported in
[1] by introducing a per-entry mutex. Additional discussion can also be found in
D104418, which will also be refined to fix another data race problem.
Here is how it works. Like before, `DataMapMtx` is still being used for mapping
table lookup and update. In any case, we will get a table entry. If we need to
make a data transfer (update the data on the device), we need to lock the entry
right before releasing `DataMapMtx`, and the issue of data transfer should be
after releasing `DataMapMtx`, and the entry is unlocked afterwards. This can
guarantee that: 1) issue of data movement is not in critical region, which will
not affect performance too much, and also will not affect other threads that don't
touch the same entry; 2) if another thread accesses the same entry, the state of
data movement is consistent (which requires that a thread must first get the
update lock before getting data movement information).
For a target that doesn't support async data transfer, issue of data movement is
data transfer. This two-lock design can potentially improve concurrency compared
with the design that guards data movement with `DataMapMtx` as well. For a target
that supports async data movement, we could simply attach the event between the
issue of data movement and unlock the entry. For a thread that wants to get the
event, it must first get the lock. This can also get rid of the busy wait until
the event pointer is valid.
Reference:
[1] https://bugs.llvm.org/show_bug.cgi?id=49940
Reviewed By: grokos
Differential Revision: https://reviews.llvm.org/D104555
Added:
Modified:
openmp/libomptarget/src/device.cpp
openmp/libomptarget/src/device.h
openmp/libomptarget/src/omptarget.cpp
Removed:
################################################################################
diff --git a/openmp/libomptarget/src/device.cpp b/openmp/libomptarget/src/device.cpp
index 33d16fb5d5484..fbbb3970f3e47 100644
--- a/openmp/libomptarget/src/device.cpp
+++ b/openmp/libomptarget/src/device.cpp
@@ -165,26 +165,18 @@ LookupResult DeviceTy::lookupMapping(void *HstPtrBegin, int64_t Size) {
return lr;
}
-// Used by targetDataBegin
-// Return a struct containing target pointer begin (where the data will be
-// moved).
-// Allocate memory if this is the first occurrence of this mapping.
-// Increment the reference counter.
-// If the target pointer is NULL, then either data allocation failed or the user
-// tried to do an illegal mapping.
-// The returned struct also returns an iterator to the map table entry
-// corresponding to the host pointer (if exists), and two flags indicating
-// whether the entry is just created, and if the target pointer included is
-// actually a host pointer (when unified memory enabled).
TargetPointerResultTy
-DeviceTy::getOrAllocTgtPtr(void *HstPtrBegin, void *HstPtrBase, int64_t Size,
- map_var_info_t HstPtrName, bool IsImplicit,
- bool UpdateRefCount, bool HasCloseModifier,
- bool HasPresentModifier) {
- void *TargetPointer = NULL;
- bool IsNew = false;
+DeviceTy::getTargetPointer(void *HstPtrBegin, void *HstPtrBase, int64_t Size,
+ map_var_info_t HstPtrName, MoveDataStateTy MoveData,
+ bool IsImplicit, bool UpdateRefCount,
+ bool HasCloseModifier, bool HasPresentModifier,
+ AsyncInfoTy &AsyncInfo) {
+ void *TargetPointer = nullptr;
bool IsHostPtr = false;
+ bool IsNew = false;
+
DataMapMtx.lock();
+
LookupResult LR = lookupMapping(HstPtrBegin, Size);
auto Entry = LR.Entry;
@@ -263,7 +255,38 @@ DeviceTy::getOrAllocTgtPtr(void *HstPtrBegin, void *HstPtrBase, int64_t Size,
TargetPointer = (void *)Ptr;
}
- DataMapMtx.unlock();
+ if (IsNew && MoveData == MoveDataStateTy::UNKNOWN)
+ MoveData = MoveDataStateTy::REQUIRED;
+
+ // If the target pointer is valid, and we need to transfer data, issue the
+ // data transfer.
+ if (TargetPointer && (MoveData == MoveDataStateTy::REQUIRED)) {
+ // Lock the entry before releasing the mapping table lock such that another
+ // thread that could issue data movement will get the right result.
+ Entry->lock();
+ // Release the mapping table lock right after the entry is locked.
+ DataMapMtx.unlock();
+
+ DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n", Size,
+ DPxPTR(HstPtrBegin), DPxPTR(TargetPointer));
+
+ int Ret = submitData(TargetPointer, HstPtrBegin, Size, AsyncInfo);
+
+ // Unlock the entry immediately after the data movement is issued.
+ Entry->unlock();
+
+ if (Ret != OFFLOAD_SUCCESS) {
+ REPORT("Copying data to device failed.\n");
+ // We will also return nullptr if the data movement fails because that
+ // pointer points to a corrupted memory region so it doesn't make any
+ // sense to continue to use it.
+ TargetPointer = nullptr;
+ }
+ } else {
+ // Release the mapping table lock directly.
+ DataMapMtx.unlock();
+ }
+
return {{IsNew, IsHostPtr}, Entry, TargetPointer};
}
diff --git a/openmp/libomptarget/src/device.h b/openmp/libomptarget/src/device.h
index dee4325822baf..b8f5ab2c7b8b4 100644
--- a/openmp/libomptarget/src/device.h
+++ b/openmp/libomptarget/src/device.h
@@ -53,12 +53,20 @@ struct HostDataToTargetTy {
/// use mutable to allow modification via std::set iterator which is const.
mutable uint64_t RefCount;
static const uint64_t INFRefCount = ~(uint64_t)0;
+ /// This mutex will be locked when data movement is issued. For targets that
+ /// doesn't support async data movement, this mutex can guarantee that after
+ /// it is released, memory region on the target is update to date. For targets
+ /// that support async data movement, this can guarantee that data movement
+ /// has been issued. This mutex *must* be locked right before releasing the
+ /// mapping table lock.
+ std::shared_ptr<std::mutex> UpdateMtx;
public:
HostDataToTargetTy(uintptr_t BP, uintptr_t B, uintptr_t E, uintptr_t TB,
map_var_info_t Name = nullptr, bool IsINF = false)
: HstPtrBase(BP), HstPtrBegin(B), HstPtrEnd(E), HstPtrName(Name),
- TgtPtrBegin(TB), RefCount(IsINF ? INFRefCount : 1) {}
+ TgtPtrBegin(TB), RefCount(IsINF ? INFRefCount : 1),
+ UpdateMtx(std::make_shared<std::mutex>()) {}
uint64_t getRefCount() const { return RefCount; }
@@ -100,6 +108,10 @@ struct HostDataToTargetTy {
return !isRefCountInf();
return getRefCount() == 1;
}
+
+ void lock() const { UpdateMtx->lock(); }
+
+ void unlock() const { UpdateMtx->unlock(); }
};
typedef uintptr_t HstPtrBeginTy;
@@ -161,6 +173,8 @@ struct PendingCtorDtorListsTy {
typedef std::map<__tgt_bin_desc *, PendingCtorDtorListsTy>
PendingCtorsDtorsPerLibrary;
+enum class MoveDataStateTy : uint32_t { REQUIRED, NONE, UNKNOWN };
+
struct DeviceTy {
int32_t DeviceID;
RTLInfoTy *RTL;
@@ -195,12 +209,21 @@ struct DeviceTy {
bool isDataExchangable(const DeviceTy &DstDevice);
LookupResult lookupMapping(void *HstPtrBegin, int64_t Size);
- TargetPointerResultTy getOrAllocTgtPtr(void *HstPtrBegin, void *HstPtrBase,
- int64_t Size,
- map_var_info_t HstPtrName,
- bool IsImplicit, bool UpdateRefCount,
- bool HasCloseModifier,
- bool HasPresentModifier);
+ /// Get the target pointer based on host pointer begin and base. If the
+ /// mapping already exists, the target pointer will be returned directly. In
+ /// addition, if \p MoveData is true, the memory region pointed by \p
+ /// HstPtrBegin of size \p Size will also be transferred to the device. If the
+ /// mapping doesn't exist, and if unified memory is not enabled, a new mapping
+ /// will be created and the data will also be transferred accordingly. nullptr
+ /// will be returned because of any of following reasons:
+ /// - Data allocation failed;
+ /// - The user tried to do an illegal mapping;
+ /// - Data transfer issue fails.
+ TargetPointerResultTy
+ getTargetPointer(void *HstPtrBegin, void *HstPtrBase, int64_t Size,
+ map_var_info_t HstPtrName, MoveDataStateTy MoveData,
+ bool IsImplicit, bool UpdateRefCount, bool HasCloseModifier,
+ bool HasPresentModifier, AsyncInfoTy &AsyncInfo);
void *getTgtPtrBegin(void *HstPtrBegin, int64_t Size);
void *getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast,
bool UpdateRefCount, bool &IsHostPtr,
diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp
index 3b1c5a8742f4c..9cfd5cda19157 100644
--- a/openmp/libomptarget/src/omptarget.cpp
+++ b/openmp/libomptarget/src/omptarget.cpp
@@ -487,9 +487,10 @@ int targetDataBegin(ident_t *loc, DeviceTy &Device, int32_t arg_num,
// entry for a global that might not already be allocated by the time the
// PTR_AND_OBJ entry is handled below, and so the allocation might fail
// when HasPresentModifier.
- Pointer_TPR = Device.getOrAllocTgtPtr(
- HstPtrBase, HstPtrBase, sizeof(void *), nullptr, IsImplicit,
- UpdateRef, HasCloseModifier, HasPresentModifier);
+ Pointer_TPR = Device.getTargetPointer(
+ HstPtrBase, HstPtrBase, sizeof(void *), nullptr,
+ MoveDataStateTy::NONE, IsImplicit, UpdateRef, HasCloseModifier,
+ HasPresentModifier, AsyncInfo);
PointerTgtPtrBegin = Pointer_TPR.TargetPointer;
IsHostPtr = Pointer_TPR.Flags.IsHostPointer;
if (!PointerTgtPtrBegin) {
@@ -511,9 +512,17 @@ int targetDataBegin(ident_t *loc, DeviceTy &Device, int32_t arg_num,
(!FromMapper || i != 0); // subsequently update ref count of pointee
}
- auto TPR = Device.getOrAllocTgtPtr(HstPtrBegin, HstPtrBase, data_size,
- HstPtrName, IsImplicit, UpdateRef,
- HasCloseModifier, HasPresentModifier);
+ MoveDataStateTy MoveData = MoveDataStateTy::NONE;
+ const bool UseUSM = PM->RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY;
+ const bool HasFlagTo = arg_types[i] & OMP_TGT_MAPTYPE_TO;
+ const bool HasFlagAlways = arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS;
+ if (HasFlagTo && (!UseUSM || HasCloseModifier))
+ MoveData = HasFlagAlways ? MoveDataStateTy::REQUIRED
+ : MoveData = MoveDataStateTy::UNKNOWN;
+
+ auto TPR = Device.getTargetPointer(
+ HstPtrBegin, HstPtrBase, data_size, HstPtrName, MoveData, IsImplicit,
+ UpdateRef, HasCloseModifier, HasPresentModifier, AsyncInfo);
void *TgtPtrBegin = TPR.TargetPointer;
IsHostPtr = TPR.Flags.IsHostPointer;
// If data_size==0, then the argument could be a zero-length pointer to
@@ -535,26 +544,6 @@ int targetDataBegin(ident_t *loc, DeviceTy &Device, int32_t arg_num,
args_base[i] = TgtPtrBase;
}
- if (arg_types[i] & OMP_TGT_MAPTYPE_TO) {
- bool copy = false;
- if (!(PM->RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) ||
- HasCloseModifier) {
- if (TPR.Flags.IsNewEntry || (arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS))
- copy = true;
- }
-
- if (copy && !IsHostPtr) {
- DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n",
- data_size, DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin));
- int rt =
- Device.submitData(TgtPtrBegin, HstPtrBegin, data_size, AsyncInfo);
- if (rt != OFFLOAD_SUCCESS) {
- REPORT("Copying data to device failed.\n");
- return OFFLOAD_FAIL;
- }
- }
- }
-
if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ && !IsHostPtr) {
// Check whether we need to update the pointer on the device
bool UpdateDevPtr = false;
@@ -582,20 +571,27 @@ int targetDataBegin(ident_t *loc, DeviceTy &Device, int32_t arg_num,
HstPtrBase, PointerTgtPtrBegin, ExpectedTgtPtrBase};
UpdateDevPtr = true;
}
- Device.ShadowMtx.unlock();
if (UpdateDevPtr) {
+ Pointer_TPR.MapTableEntry->lock();
+ Device.ShadowMtx.unlock();
+
DP("Update pointer (" DPxMOD ") -> [" DPxMOD "]\n",
DPxPTR(PointerTgtPtrBegin), DPxPTR(TgtPtrBegin));
+
void *&TgtPtrBase = AsyncInfo.getVoidPtrLocation();
TgtPtrBase = ExpectedTgtPtrBase;
+
int rt = Device.submitData(PointerTgtPtrBegin, &TgtPtrBase,
sizeof(void *), AsyncInfo);
+ Pointer_TPR.MapTableEntry->unlock();
+
if (rt != OFFLOAD_SUCCESS) {
REPORT("Copying data to device failed.\n");
return OFFLOAD_FAIL;
}
- }
+ } else
+ Device.ShadowMtx.unlock();
}
}
More information about the Openmp-commits
mailing list