[Openmp-commits] [openmp] 8425bde - Revert "[OpenMP] Avoid costly shadow map traversals whenever possible"
Joseph Huber via Openmp-commits
openmp-commits at lists.llvm.org
Fri Dec 10 12:58:16 PST 2021
Author: Joseph Huber
Date: 2021-12-10T15:57:58-05:00
New Revision: 8425bde82d0d96033ed74edbb94aba8e1dbee0c9
URL: https://github.com/llvm/llvm-project/commit/8425bde82d0d96033ed74edbb94aba8e1dbee0c9
DIFF: https://github.com/llvm/llvm-project/commit/8425bde82d0d96033ed74edbb94aba8e1dbee0c9.diff
LOG: Revert "[OpenMP] Avoid costly shadow map traversals whenever possible"
This reverts commit 7c8f4e7b85ed98497f37571d72609f39a8eed447.
Fails a few OpenMP tests, causes a few updates to segfault.
Added:
Modified:
openmp/libomptarget/src/api.cpp
openmp/libomptarget/src/device.cpp
openmp/libomptarget/src/device.h
openmp/libomptarget/src/omptarget.cpp
Removed:
################################################################################
diff --git a/openmp/libomptarget/src/api.cpp b/openmp/libomptarget/src/api.cpp
index 5aaf5ad0ef7e4..47f30e69b1136 100644
--- a/openmp/libomptarget/src/api.cpp
+++ b/openmp/libomptarget/src/api.cpp
@@ -107,11 +107,10 @@ EXTERN int omp_target_is_present(const void *ptr, int device_num) {
DeviceTy &Device = *PM->Devices[device_num];
bool IsLast; // not used
bool IsHostPtr;
- TargetPointerResultTy TPR =
- Device.getTgtPtrBegin(const_cast<void *>(ptr), 0, IsLast,
- /*UpdateRefCount=*/false,
- /*UseHoldRefCount=*/false, IsHostPtr);
- int rc = (TPR.TargetPointer != NULL);
+ void *TgtPtr = Device.getTgtPtrBegin(const_cast<void *>(ptr), 0, IsLast,
+ /*UpdateRefCount=*/false,
+ /*UseHoldRefCount=*/false, IsHostPtr);
+ int rc = (TgtPtr != NULL);
// Under unified memory the host pointer can be returned by the
// getTgtPtrBegin() function which means that there is no device
// corresponding point for ptr. This function should return false
diff --git a/openmp/libomptarget/src/device.cpp b/openmp/libomptarget/src/device.cpp
index 1e6b8825d5a27..90d9947068209 100644
--- a/openmp/libomptarget/src/device.cpp
+++ b/openmp/libomptarget/src/device.cpp
@@ -285,13 +285,12 @@ DeviceTy::getTargetPointer(void *HstPtrBegin, void *HstPtrBase, int64_t Size,
// Used by targetDataBegin, targetDataEnd, targetDataUpdate and target.
// Return the target pointer begin (where the data will be moved).
-// Decrement the reference counter if called from targetDataEnd. The data is
-// excepted to be mapped already, so the result will never be new.
-TargetPointerResultTy
-DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast,
- bool UpdateRefCount, bool UseHoldRefCount,
- bool &IsHostPtr, bool MustContain, bool ForceDelete) {
- void *TargetPointer = NULL;
+// Decrement the reference counter if called from targetDataEnd.
+void *DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast,
+ bool UpdateRefCount, bool UseHoldRefCount,
+ bool &IsHostPtr, bool MustContain,
+ bool ForceDelete) {
+ void *rc = NULL;
IsHostPtr = false;
IsLast = false;
DataMapMtx.lock();
@@ -333,7 +332,7 @@ DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast,
"Size=%" PRId64 ", DynRefCount=%s%s, HoldRefCount=%s%s\n",
DPxPTR(HstPtrBegin), DPxPTR(tp), Size, HT.dynRefCountToStr().c_str(),
DynRefCountAction, HT.holdRefCountToStr().c_str(), HoldRefCountAction);
- TargetPointer = (void *)tp;
+ rc = (void *)tp;
} else if (PM->RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) {
// If the value isn't found in the mapping and unified shared memory
// is on then it means we have stumbled upon a value which we need to
@@ -342,11 +341,11 @@ DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast,
"memory\n",
DPxPTR((uintptr_t)HstPtrBegin), Size);
IsHostPtr = true;
- TargetPointer = HstPtrBegin;
+ rc = HstPtrBegin;
}
DataMapMtx.unlock();
- return {{false, IsHostPtr}, lr.Entry, TargetPointer};
+ return rc;
}
// Return the target pointer begin (where the data will be moved).
diff --git a/openmp/libomptarget/src/device.h b/openmp/libomptarget/src/device.h
index f41b690acbd24..75dde85a8806d 100644
--- a/openmp/libomptarget/src/device.h
+++ b/openmp/libomptarget/src/device.h
@@ -57,8 +57,7 @@ struct HostDataToTargetTy {
struct StatesTy {
StatesTy(uint64_t DRC, uint64_t HRC)
- : DynRefCount(DRC), HoldRefCount(HRC),
- MayContainAttachedPointers(false) {}
+ : DynRefCount(DRC), HoldRefCount(HRC) {}
/// The dynamic reference count is the standard reference count as of OpenMP
/// 4.5. The hold reference count is an OpenMP extension for the sake of
/// OpenACC support.
@@ -76,11 +75,6 @@ struct HostDataToTargetTy {
///
uint64_t DynRefCount;
uint64_t HoldRefCount;
-
- /// Boolean flag to remember if any subpart of the mapped region might be
- /// an attached pointer.
- bool MayContainAttachedPointers;
-
/// This mutex will be locked when data movement is issued. For targets that
/// doesn't support async data movement, this mutex can guarantee that after
/// it is released, memory region on the target is update to date. For
@@ -187,13 +181,6 @@ struct HostDataToTargetTy {
void lock() const { States->UpdateMtx.lock(); }
void unlock() const { States->UpdateMtx.unlock(); }
-
- void setMayContainAttachedPointers() const {
- States->MayContainAttachedPointers = true;
- }
- bool getMayContainAttachedPointers() const {
- return States->MayContainAttachedPointers;
- }
};
typedef uintptr_t HstPtrBeginTy;
@@ -303,11 +290,10 @@ struct DeviceTy {
bool HasCloseModifier, bool HasPresentModifier,
bool HasHoldModifier, AsyncInfoTy &AsyncInfo);
void *getTgtPtrBegin(void *HstPtrBegin, int64_t Size);
- TargetPointerResultTy getTgtPtrBegin(void *HstPtrBegin, int64_t Size,
- bool &IsLast, bool UpdateRefCount,
- bool UseHoldRefCount, bool &IsHostPtr,
- bool MustContain = false,
- bool ForceDelete = false);
+ void *getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast,
+ bool UpdateRefCount, bool UseHoldRefCount,
+ bool &IsHostPtr, bool MustContain = false,
+ bool ForceDelete = false);
/// For the map entry for \p HstPtrBegin, decrement the reference count
/// specified by \p HasHoldModifier and, if the the total reference count is
/// then zero, deallocate the corresponding device storage and remove the map
diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp
index a08963c87eef9..3e9f6427b4724 100644
--- a/openmp/libomptarget/src/omptarget.cpp
+++ b/openmp/libomptarget/src/omptarget.cpp
@@ -17,7 +17,6 @@
#include "rtl.h"
#include <cassert>
-#include <cstdint>
#include <vector>
int AsyncInfoTy::synchronize() {
@@ -569,7 +568,6 @@ int targetDataBegin(ident_t *loc, DeviceTy &Device, int32_t arg_num,
// create or update shadow pointers for this entry
Device.ShadowPtrMap[Pointer_HstPtrBegin] = {
HstPtrBase, PointerTgtPtrBegin, ExpectedTgtPtrBase};
- TPR.MapTableEntry->setMayContainAttachedPointers();
UpdateDevPtr = true;
}
@@ -613,48 +611,6 @@ struct DeallocTgtPtrInfo {
DeallocTgtPtrInfo(void *HstPtr, int64_t Size, bool HasHoldModifier)
: HstPtrBegin(HstPtr), DataSize(Size), HasHoldModifier(HasHoldModifier) {}
};
-
-/// Apply \p CB to the shadow map pointer entries in the range \p Begin, to
-/// \p Begin + \p Size. \p CB is called with a locked shadow pointer map and the
-/// passed iterator can be updated. If the callback returns OFFLOAD_FAIL the
-/// rest of the map is not checked anymore.
-template <typename CBTy>
-static void applyToShadowMapEntries(DeviceTy &Device, CBTy CB, void *Begin,
- uintptr_t Size,
- const TargetPointerResultTy &TPR) {
- // If we have an object that is too small to hold a pointer subobject, no need
- // to do any checking.
- if (Size < sizeof(void *))
- return;
-
- // If the map entry for the object was never marked as containing attached
- // pointers, no need to do any checking.
- if (!TPR.MapTableEntry->getMayContainAttachedPointers())
- return;
-
- uintptr_t LB = (uintptr_t)Begin;
- uintptr_t UB = LB + Size;
- // Now we are looking into the shadow map so we need to lock it.
- Device.ShadowMtx.lock();
- for (ShadowPtrListTy::iterator Itr = Device.ShadowPtrMap.begin();
- Itr != Device.ShadowPtrMap.end();) {
- uintptr_t ShadowHstPtrAddr = (uintptr_t)Itr->first;
-
- // An STL map is sorted on its keys; use this property
- // to quickly determine when to break out of the loop.
- if (ShadowHstPtrAddr < LB) {
- ++Itr;
- continue;
- }
- if (ShadowHstPtrAddr >= UB)
- break;
-
- if (CB(Itr) == OFFLOAD_FAIL)
- break;
- }
- Device.ShadowMtx.unlock();
-}
-
} // namespace
/// Internal function to undo the mapping and retrieve the data from the device.
@@ -722,10 +678,9 @@ int targetDataEnd(ident_t *loc, DeviceTy &Device, int32_t ArgNum,
bool HasHoldModifier = ArgTypes[I] & OMP_TGT_MAPTYPE_OMPX_HOLD;
// If PTR_AND_OBJ, HstPtrBegin is address of pointee
- TargetPointerResultTy TPR = Device.getTgtPtrBegin(
+ void *TgtPtrBegin = Device.getTgtPtrBegin(
HstPtrBegin, DataSize, IsLast, UpdateRef, HasHoldModifier, IsHostPtr,
!IsImplicit, ForceDelete);
- void *TgtPtrBegin = TPR.TargetPointer;
if (!TgtPtrBegin && (DataSize || HasPresentModifier)) {
DP("Mapping does not exist (%s)\n",
(HasPresentModifier ? "'present' map type modifier" : "ignored"));
@@ -798,10 +753,24 @@ int targetDataEnd(ident_t *loc, DeviceTy &Device, int32_t ArgNum,
// need to restore the original host pointer values from their shadow
// copies. If the struct is going to be deallocated, remove any remaining
// shadow pointer entries for this struct.
- auto CB = [&](ShadowPtrListTy::iterator &Itr) {
+ uintptr_t LB = (uintptr_t)HstPtrBegin;
+ uintptr_t UB = (uintptr_t)HstPtrBegin + DataSize;
+ Device.ShadowMtx.lock();
+ for (ShadowPtrListTy::iterator Itr = Device.ShadowPtrMap.begin();
+ Itr != Device.ShadowPtrMap.end();) {
+ void **ShadowHstPtrAddr = (void **)Itr->first;
+
+ // An STL map is sorted on its keys; use this property
+ // to quickly determine when to break out of the loop.
+ if ((uintptr_t)ShadowHstPtrAddr < LB) {
+ ++Itr;
+ continue;
+ }
+ if ((uintptr_t)ShadowHstPtrAddr >= UB)
+ break;
+
// If we copied the struct to the host, we need to restore the pointer.
if (ArgTypes[I] & OMP_TGT_MAPTYPE_FROM) {
- void **ShadowHstPtrAddr = (void **)Itr->first;
DP("Restoring original host pointer value " DPxMOD " for host "
"pointer " DPxMOD "\n",
DPxPTR(Itr->second.HstPtrVal), DPxPTR(ShadowHstPtrAddr));
@@ -809,15 +778,13 @@ int targetDataEnd(ident_t *loc, DeviceTy &Device, int32_t ArgNum,
}
// If the struct is to be deallocated, remove the shadow entry.
if (DelEntry) {
- DP("Removing shadow pointer " DPxMOD "\n",
- DPxPTR(Itr->second.HstPtrVal));
+ DP("Removing shadow pointer " DPxMOD "\n", DPxPTR(ShadowHstPtrAddr));
Itr = Device.ShadowPtrMap.erase(Itr);
} else {
++Itr;
}
- return OFFLOAD_SUCCESS;
- };
- applyToShadowMapEntries(Device, CB, HstPtrBegin, DataSize, TPR);
+ }
+ Device.ShadowMtx.unlock();
// Add pointer to the buffer for later deallocation
if (DelEntry && !IsHostPtr)
@@ -853,10 +820,9 @@ static int targetDataContiguous(ident_t *loc, DeviceTy &Device, void *ArgsBase,
int64_t ArgType, AsyncInfoTy &AsyncInfo) {
TIMESCOPE_WITH_IDENT(loc);
bool IsLast, IsHostPtr;
- TargetPointerResultTy TPR = Device.getTgtPtrBegin(
+ void *TgtPtrBegin = Device.getTgtPtrBegin(
HstPtrBegin, ArgSize, IsLast, /*UpdateRefCount=*/false,
/*UseHoldRefCount=*/false, IsHostPtr, /*MustContain=*/true);
- void *TgtPtrBegin = TPR.TargetPointer;
if (!TgtPtrBegin) {
DP("hst data:" DPxMOD " not found, becomes a noop\n", DPxPTR(HstPtrBegin));
if (ArgType & OMP_TGT_MAPTYPE_PRESENT) {
@@ -883,15 +849,22 @@ static int targetDataContiguous(ident_t *loc, DeviceTy &Device, void *ArgsBase,
return OFFLOAD_FAIL;
}
- auto CB = [&](ShadowPtrListTy::iterator &Itr) {
- void **ShadowHstPtrAddr = (void **)Itr->first;
+ uintptr_t LB = (uintptr_t)HstPtrBegin;
+ uintptr_t UB = (uintptr_t)HstPtrBegin + ArgSize;
+ Device.ShadowMtx.lock();
+ for (ShadowPtrListTy::iterator IT = Device.ShadowPtrMap.begin();
+ IT != Device.ShadowPtrMap.end(); ++IT) {
+ void **ShadowHstPtrAddr = (void **)IT->first;
+ if ((uintptr_t)ShadowHstPtrAddr < LB)
+ continue;
+ if ((uintptr_t)ShadowHstPtrAddr >= UB)
+ break;
DP("Restoring original host pointer value " DPxMOD
" for host pointer " DPxMOD "\n",
- DPxPTR(Itr->second.HstPtrVal), DPxPTR(ShadowHstPtrAddr));
- *ShadowHstPtrAddr = Itr->second.HstPtrVal;
- return OFFLOAD_SUCCESS;
- };
- applyToShadowMapEntries(Device, CB, HstPtrBegin, ArgSize, TPR);
+ DPxPTR(IT->second.HstPtrVal), DPxPTR(ShadowHstPtrAddr));
+ *ShadowHstPtrAddr = IT->second.HstPtrVal;
+ }
+ Device.ShadowMtx.unlock();
}
if (ArgType & OMP_TGT_MAPTYPE_TO) {
@@ -903,17 +876,28 @@ static int targetDataContiguous(ident_t *loc, DeviceTy &Device, void *ArgsBase,
return OFFLOAD_FAIL;
}
- auto CB = [&](ShadowPtrListTy::iterator &Itr) {
+ uintptr_t LB = (uintptr_t)HstPtrBegin;
+ uintptr_t UB = (uintptr_t)HstPtrBegin + ArgSize;
+ Device.ShadowMtx.lock();
+ for (ShadowPtrListTy::iterator IT = Device.ShadowPtrMap.begin();
+ IT != Device.ShadowPtrMap.end(); ++IT) {
+ void **ShadowHstPtrAddr = (void **)IT->first;
+ if ((uintptr_t)ShadowHstPtrAddr < LB)
+ continue;
+ if ((uintptr_t)ShadowHstPtrAddr >= UB)
+ break;
DP("Restoring original target pointer value " DPxMOD " for target "
"pointer " DPxMOD "\n",
- DPxPTR(Itr->second.TgtPtrVal), DPxPTR(Itr->second.TgtPtrAddr));
- Ret = Device.submitData(Itr->second.TgtPtrAddr, &Itr->second.TgtPtrVal,
+ DPxPTR(IT->second.TgtPtrVal), DPxPTR(IT->second.TgtPtrAddr));
+ Ret = Device.submitData(IT->second.TgtPtrAddr, &IT->second.TgtPtrVal,
sizeof(void *), AsyncInfo);
- if (Ret != OFFLOAD_SUCCESS)
+ if (Ret != OFFLOAD_SUCCESS) {
REPORT("Copying data to device failed.\n");
- return Ret;
- };
- applyToShadowMapEntries(Device, CB, HstPtrBegin, ArgSize, TPR);
+ Device.ShadowMtx.unlock();
+ return OFFLOAD_FAIL;
+ }
+ }
+ Device.ShadowMtx.unlock();
}
return OFFLOAD_SUCCESS;
}
@@ -1296,10 +1280,9 @@ static int processDataBefore(ident_t *loc, int64_t DeviceId, void *HostPtr,
uint64_t Delta = (uint64_t)HstPtrBegin - (uint64_t)HstPtrBase;
void *TgtPtrBegin = (void *)((uintptr_t)TgtPtrBase + Delta);
void *&PointerTgtPtrBegin = AsyncInfo.getVoidPtrLocation();
- TargetPointerResultTy TPR = Device.getTgtPtrBegin(
+ PointerTgtPtrBegin = Device.getTgtPtrBegin(
HstPtrVal, ArgSizes[I], IsLast, /*UpdateRefCount=*/false,
/*UseHoldRefCount=*/false, IsHostPtr);
- PointerTgtPtrBegin = TPR.TargetPointer;
if (!PointerTgtPtrBegin) {
DP("No lambda captured variable mapped (" DPxMOD ") - ignored\n",
DPxPTR(HstPtrVal));
@@ -1328,7 +1311,6 @@ static int processDataBefore(ident_t *loc, int64_t DeviceId, void *HostPtr,
map_var_info_t HstPtrName = (!ArgNames) ? nullptr : ArgNames[I];
ptr
diff _t TgtBaseOffset;
bool IsLast, IsHostPtr; // unused.
- TargetPointerResultTy TPR;
if (ArgTypes[I] & OMP_TGT_MAPTYPE_LITERAL) {
DP("Forwarding first-private value " DPxMOD " to the target construct\n",
DPxPTR(HstPtrBase));
@@ -1354,10 +1336,9 @@ static int processDataBefore(ident_t *loc, int64_t DeviceId, void *HostPtr,
} else {
if (ArgTypes[I] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)
HstPtrBase = *reinterpret_cast<void **>(HstPtrBase);
- TPR = Device.getTgtPtrBegin(HstPtrBegin, ArgSizes[I], IsLast,
- /*UpdateRefCount=*/false,
- /*UseHoldRefCount=*/false, IsHostPtr);
- TgtPtrBegin = TPR.TargetPointer;
+ TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBegin, ArgSizes[I], IsLast,
+ /*UpdateRefCount=*/false,
+ /*UseHoldRefCount=*/false, IsHostPtr);
TgtBaseOffset = (intptr_t)HstPtrBase - (intptr_t)HstPtrBegin;
#ifdef OMPTARGET_DEBUG
void *TgtPtrBase = (void *)((intptr_t)TgtPtrBegin + TgtBaseOffset);
More information about the Openmp-commits
mailing list