[Openmp-commits] [openmp] 6ca0346 - [OpenMP][libomptarget] Notify the plugins regarding new mapping/unmappings
Kevin Sala via Openmp-commits
openmp-commits at lists.llvm.org
Mon Feb 6 01:10:09 PST 2023
Author: Kevin Sala
Date: 2023-02-06T10:09:35+01:00
New Revision: 6ca034644d59dde3e00c1a41196bb9ba44bc1b17
URL: https://github.com/llvm/llvm-project/commit/6ca034644d59dde3e00c1a41196bb9ba44bc1b17
DIFF: https://github.com/llvm/llvm-project/commit/6ca034644d59dde3e00c1a41196bb9ba44bc1b17.diff
LOG: [OpenMP][libomptarget] Notify the plugins regarding new mapping/unmappings
The NextGen plugins use the information regarding new mapping/unmappings to
lock/unlock the corresponding host buffer and speed up the host-device memory
transfers involving those buffers. The locking/unlocking is disabled by default
and can be enabled by the LIBOMPTARGET_LOCK_MAPPED_HOST_BUFFERS envar. The
envar accepts boolean values (on/off) and a special option:
- off: Do not lock mapped host buffers (default).
- on: Lock mapped host buffers automatically, but do not report lock
failures if the plugin fails to lock them.
- mandatory: Lock mapped host buffers automatically and treat locking failures
in the plugins as fatal errors. This option may be useful for
debugging purposes.
Differential Revision: https://reviews.llvm.org/D142514
Added:
Modified:
openmp/libomptarget/include/device.h
openmp/libomptarget/include/omptargetplugin.h
openmp/libomptarget/include/rtl.h
openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp
openmp/libomptarget/src/device.cpp
openmp/libomptarget/src/omptarget.cpp
openmp/libomptarget/src/rtl.cpp
openmp/libomptarget/test/lit.cfg
Removed:
################################################################################
diff --git a/openmp/libomptarget/include/device.h b/openmp/libomptarget/include/device.h
index d4179857d3bd6..f1fbc287d017f 100644
--- a/openmp/libomptarget/include/device.h
+++ b/openmp/libomptarget/include/device.h
@@ -444,6 +444,14 @@ struct DeviceTy {
int32_t dataExchange(void *SrcPtr, DeviceTy &DstDev, void *DstPtr,
int64_t Size, AsyncInfoTy &AsyncInfo);
+ /// Notify the plugin about a new mapping starting at the host address
+ /// \p HstPtr and \p Size bytes.
+ int32_t notifyDataMapped(void *HstPtr, int64_t Size);
+
+ /// Notify the plugin about an existing mapping being unmapped starting at
+ /// the host address \p HstPtr.
+ int32_t notifyDataUnmapped(void *HstPtr);
+
// Launch the kernel identified by \p TgtEntryPtr with the given arguments.
int32_t launchKernel(void *TgtEntryPtr, void **TgtVarsPtr,
ptr
diff _t *TgtOffsets, const KernelArgsTy &KernelArgs,
diff --git a/openmp/libomptarget/include/omptargetplugin.h b/openmp/libomptarget/include/omptargetplugin.h
index 04f2a79e089dd..3aa33528f9a85 100644
--- a/openmp/libomptarget/include/omptargetplugin.h
+++ b/openmp/libomptarget/include/omptargetplugin.h
@@ -209,6 +209,15 @@ int32_t __tgt_rtl_data_lock(int32_t ID, void *HstPtr, int64_t Size,
// unlock/unpin host memory
int32_t __tgt_rtl_data_unlock(int32_t ID, void *HstPtr);
+// Notify the plugin about a new mapping starting at the host address \p HstPtr
+// and \p Size bytes. The plugin may lock/pin that buffer to achieve optimal
+// memory transfers involving that buffer.
+int32_t __tgt_rtl_data_notify_mapped(int32_t ID, void *HstPtr, int64_t Size);
+
+// Notify the plugin about an existing mapping being unmapped, starting at the
+// host address \p HstPtr and \p Size bytes.
+int32_t __tgt_rtl_data_notify_unmapped(int32_t ID, void *HstPtr);
+
#ifdef __cplusplus
}
#endif
diff --git a/openmp/libomptarget/include/rtl.h b/openmp/libomptarget/include/rtl.h
index 3d40aa574f406..145f05585a17d 100644
--- a/openmp/libomptarget/include/rtl.h
+++ b/openmp/libomptarget/include/rtl.h
@@ -70,6 +70,8 @@ struct RTLInfoTy {
const char **);
typedef int32_t(data_lock_ty)(int32_t, void *, int64_t, void **);
typedef int32_t(data_unlock_ty)(int32_t, void *);
+ typedef int32_t(data_notify_mapped_ty)(int32_t, void *, int64_t);
+ typedef int32_t(data_notify_unmapped_ty)(int32_t, void *);
int32_t Idx = -1; // RTL index, index is the number of devices
// of other RTLs that were registered before,
@@ -120,6 +122,8 @@ struct RTLInfoTy {
release_async_info_ty *release_async_info = nullptr;
data_lock_ty *data_lock = nullptr;
data_unlock_ty *data_unlock = nullptr;
+ data_notify_mapped_ty *data_notify_mapped = nullptr;
+ data_notify_unmapped_ty *data_notify_unmapped = nullptr;
// Are there images associated with this RTL.
bool IsUsed = false;
diff --git a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
index 6f57e19b343b3..983265f6ef96f 100644
--- a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -1844,6 +1844,38 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
return Plugin::check(Status, "Error in hsa_amd_memory_unlock: %s\n");
}
+ /// Check through the HSA runtime whether the \p HstPtr buffer is pinned.
+ Expected<bool> isPinnedPtrImpl(void *HstPtr, void *&BaseHstPtr,
+ void *&BaseDevAccessiblePtr,
+ size_t &BaseSize) const override {
+ hsa_amd_pointer_info_t Info;
+ Info.size = sizeof(hsa_amd_pointer_info_t);
+
+ hsa_status_t Status =
+ hsa_amd_pointer_info(HstPtr, &Info, /* Allocator */ nullptr,
+ /* Number of accessible agents (out) */ nullptr,
+ /* Accessible agents */ nullptr);
+ if (auto Err = Plugin::check(Status, "Error in hsa_amd_pointer_info: %s"))
+ return Err;
+
+ // The buffer may be locked or allocated through HSA allocators. Assume that
+ // the buffer is host pinned if the runtime reports a HSA type.
+ if (Info.type != HSA_EXT_POINTER_TYPE_LOCKED &&
+ Info.type != HSA_EXT_POINTER_TYPE_HSA)
+ return false;
+
+ assert(Info.hostBaseAddress && "Invalid host pinned address");
+ assert(Info.agentBaseAddress && "Invalid agent pinned address");
+ assert(Info.sizeInBytes > 0 && "Invalid pinned allocation size");
+
+ // Save the allocation info in the output parameters.
+ BaseHstPtr = Info.hostBaseAddress;
+ BaseDevAccessiblePtr = Info.agentBaseAddress;
+ BaseSize = Info.sizeInBytes;
+
+ return true;
+ }
+
/// Submit data to the device (host to device transfer).
Error dataSubmitImpl(void *TgtPtr, const void *HstPtr, int64_t Size,
AsyncInfoWrapperTy &AsyncInfoWrapper) override {
diff --git a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
index d1f5996f3b900..4a1803254d4a3 100644
--- a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
+++ b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
@@ -582,20 +582,68 @@ GenericDeviceTy::getExecutionModeForKernel(StringRef Name,
return ExecModeGlobal.getValue();
}
+Error PinnedAllocationMapTy::insertEntry(void *HstPtr, void *DevAccessiblePtr,
+ size_t Size, bool ExternallyLocked) {
+ // Insert the new entry into the map.
+ auto Res = Allocs.insert({HstPtr, DevAccessiblePtr, Size, ExternallyLocked});
+ if (!Res.second)
+ return Plugin::error("Cannot insert locked buffer entry");
+
+ // Check whether the next entry overlaps with the inserted entry.
+ auto It = std::next(Res.first);
+ if (It == Allocs.end())
+ return Plugin::success();
+
+ const EntryTy *NextEntry = &(*It);
+ if (intersects(NextEntry->HstPtr, NextEntry->Size, HstPtr, Size))
+ return Plugin::error("Partial overlapping not allowed in locked buffers");
+
+ return Plugin::success();
+}
+
+Error PinnedAllocationMapTy::eraseEntry(const EntryTy &Entry) {
+ // Erase the existing entry. Notice this requires an additional map lookup,
+ // but this should not be a performance issue. Using iterators would make
+ // the code more
diff icult to read.
+ size_t Erased = Allocs.erase({Entry.HstPtr});
+ if (!Erased)
+ return Plugin::error("Cannot erase locked buffer entry");
+ return Plugin::success();
+}
+
+Error PinnedAllocationMapTy::registerEntryUse(const EntryTy &Entry,
+ void *HstPtr, size_t Size) {
+ if (!contains(Entry.HstPtr, Entry.Size, HstPtr, Size))
+ return Plugin::error("Partial overlapping not allowed in locked buffers");
+
+ ++Entry.References;
+ return Plugin::success();
+}
+
+Expected<bool> PinnedAllocationMapTy::unregisterEntryUse(const EntryTy &Entry) {
+ if (Entry.References == 0)
+ return Plugin::error("Invalid number of references");
+
+ // Return whether this was the last user.
+ return (--Entry.References == 0);
+}
+
Error PinnedAllocationMapTy::registerHostBuffer(void *HstPtr,
void *DevAccessiblePtr,
size_t Size) {
assert(HstPtr && "Invalid pointer");
assert(DevAccessiblePtr && "Invalid pointer");
+ assert(Size && "Invalid size");
std::lock_guard<std::shared_mutex> Lock(Mutex);
// No pinned allocation should intersect.
- auto Res = Allocs.insert({HstPtr, DevAccessiblePtr, Size});
- if (!Res.second)
- return Plugin::error("Cannot register locked buffer");
+ const EntryTy *Entry = findIntersecting(HstPtr);
+ if (Entry)
+ return Plugin::error("Cannot insert entry due to an existing one");
- return Plugin::success();
+ // Now insert the new entry.
+ return insertEntry(HstPtr, DevAccessiblePtr, Size);
}
Error PinnedAllocationMapTy::unregisterHostBuffer(void *HstPtr) {
@@ -603,62 +651,58 @@ Error PinnedAllocationMapTy::unregisterHostBuffer(void *HstPtr) {
std::lock_guard<std::shared_mutex> Lock(Mutex);
- // Find the pinned allocation starting at the host pointer address.
- auto It = Allocs.find({HstPtr});
- if (It == Allocs.end())
+ const EntryTy *Entry = findIntersecting(HstPtr);
+ if (!Entry)
return Plugin::error("Cannot find locked buffer");
- const EntryTy &Entry = *It;
+ // The address in the entry should be the same we are unregistering.
+ if (Entry->HstPtr != HstPtr)
+ return Plugin::error("Unexpected host pointer in locked buffer entry");
+
+ // Unregister from the entry.
+ auto LastUseOrErr = unregisterEntryUse(*Entry);
+ if (!LastUseOrErr)
+ return LastUseOrErr.takeError();
// There should be no other references to the pinned allocation.
- if (Entry.References > 1)
+ if (!(*LastUseOrErr))
return Plugin::error("The locked buffer is still being used");
- // Remove the entry from the map.
- Allocs.erase(It);
-
- return Plugin::success();
+ // Erase the entry from the map.
+ return eraseEntry(*Entry);
}
Expected<void *> PinnedAllocationMapTy::lockHostBuffer(void *HstPtr,
size_t Size) {
assert(HstPtr && "Invalid pointer");
+ assert(Size && "Invalid size");
std::lock_guard<std::shared_mutex> Lock(Mutex);
- auto It = findIntersecting(HstPtr);
-
- // No intersecting registered allocation found in the map. We must lock and
- // register the memory buffer into the map.
- if (It == Allocs.end()) {
- // First, lock the host buffer and retrieve the device accessible pointer.
- auto PinnedPtrOrErr = Device.dataLockImpl(HstPtr, Size);
- if (!PinnedPtrOrErr)
- return PinnedPtrOrErr.takeError();
+ const EntryTy *Entry = findIntersecting(HstPtr);
- // Then, insert the host buffer entry into the map.
- auto Res = Allocs.insert({HstPtr, *PinnedPtrOrErr, Size});
- if (!Res.second)
- return Plugin::error("Cannot register locked buffer");
+ if (Entry) {
+ // An already registered intersecting buffer was found. Register a new use.
+ if (auto Err = registerEntryUse(*Entry, HstPtr, Size))
+ return Err;
- // Return the device accessible pointer.
- return *PinnedPtrOrErr;
+ // Return the device accessible pointer with the correct offset.
+ return advanceVoidPtr(Entry->DevAccessiblePtr,
+ getPtrDiff(HstPtr, Entry->HstPtr));
}
- const EntryTy &Entry = *It;
+ // No intersecting registered allocation found in the map. First, lock the
+ // host buffer and retrieve the device accessible pointer.
+ auto DevAccessiblePtrOrErr = Device.dataLockImpl(HstPtr, Size);
+ if (!DevAccessiblePtrOrErr)
+ return DevAccessiblePtrOrErr.takeError();
-#ifdef OMPTARGET_DEBUG
- // Do not allow partial overlapping among host pinned buffers.
- if (advanceVoidPtr(HstPtr, Size) > advanceVoidPtr(Entry.HstPtr, Entry.Size))
- return Plugin::error("Partial overlapping not allowed in locked memory");
-#endif
-
- // Increase the number of references.
- Entry.References++;
+ // Now insert the new entry into the map.
+ if (auto Err = insertEntry(HstPtr, *DevAccessiblePtrOrErr, Size))
+ return Err;
- // Return the device accessible pointer after applying the correct offset.
- return advanceVoidPtr(Entry.DevAccessiblePtr,
- getPtrDiff(HstPtr, Entry.HstPtr));
+ // Return the device accessible pointer.
+ return *DevAccessiblePtrOrErr;
}
Error PinnedAllocationMapTy::unlockHostBuffer(void *HstPtr) {
@@ -666,28 +710,113 @@ Error PinnedAllocationMapTy::unlockHostBuffer(void *HstPtr) {
std::lock_guard<std::shared_mutex> Lock(Mutex);
- auto It = findIntersecting(HstPtr);
- if (It == Allocs.end())
+ const EntryTy *Entry = findIntersecting(HstPtr);
+ if (!Entry)
return Plugin::error("Cannot find locked buffer");
- const EntryTy &Entry = *It;
-
- // Decrease the number of references. No need to do anything if there are
+ // Unregister from the locked buffer. No need to do anything if there are
// others using the allocation.
- if (--Entry.References > 0)
+ auto LastUseOrErr = unregisterEntryUse(*Entry);
+ if (!LastUseOrErr)
+ return LastUseOrErr.takeError();
+
+ // No need to do anything if there are others using the allocation.
+ if (!(*LastUseOrErr))
return Plugin::success();
- // This was the last user of the allocation. Unlock the original locked memory
- // buffer, which is the host pointer stored in the entry.
- if (auto Err = Device.dataUnlockImpl(Entry.HstPtr))
- return Err;
+ // This was the last user of the allocation. Unlock the original locked buffer
+ // if it was locked by the plugin. Do not unlock it if it was locked by an
+ // external entity. Unlock the buffer using the host pointer of the entry.
+ if (!Entry->ExternallyLocked)
+ if (auto Err = Device.dataUnlockImpl(Entry->HstPtr))
+ return Err;
- // Remove the entry from the map.
- size_t Erased = Allocs.erase(Entry);
- if (!Erased)
- return Plugin::error("Cannot find locked buffer");
+ // Erase the entry from the map.
+ return eraseEntry(*Entry);
+}
- return Plugin::success();
+Error PinnedAllocationMapTy::lockMappedHostBuffer(void *HstPtr, size_t Size) {
+ assert(HstPtr && "Invalid pointer");
+ assert(Size && "Invalid size");
+
+ std::lock_guard<std::shared_mutex> Lock(Mutex);
+
+ // If previously registered, just register a new user on the entry.
+ const EntryTy *Entry = findIntersecting(HstPtr);
+ if (Entry)
+ return registerEntryUse(*Entry, HstPtr, Size);
+
+ size_t BaseSize;
+ void *BaseHstPtr, *BaseDevAccessiblePtr;
+
+ // Check if it was externally pinned by a vendor-specific API.
+ auto IsPinnedOrErr = Device.isPinnedPtrImpl(HstPtr, BaseHstPtr,
+ BaseDevAccessiblePtr, BaseSize);
+ if (!IsPinnedOrErr)
+ return IsPinnedOrErr.takeError();
+
+ // If pinned, just insert the entry representing the whole pinned buffer.
+ if (*IsPinnedOrErr)
+ return insertEntry(BaseHstPtr, BaseDevAccessiblePtr, BaseSize,
+ /* Externally locked */ true);
+
+ // Not externally pinned. Do nothing if locking of mapped buffers is disabled.
+ if (!LockMappedBuffers)
+ return Plugin::success();
+
+ // Otherwise, lock the buffer and insert the new entry.
+ auto DevAccessiblePtrOrErr = Device.dataLockImpl(HstPtr, Size);
+ if (!DevAccessiblePtrOrErr) {
+ // Errors may be tolerated.
+ if (!IgnoreLockMappedFailures)
+ return DevAccessiblePtrOrErr.takeError();
+
+ consumeError(DevAccessiblePtrOrErr.takeError());
+ return Plugin::success();
+ }
+
+ return insertEntry(HstPtr, *DevAccessiblePtrOrErr, Size);
+}
+
+Error PinnedAllocationMapTy::unlockUnmappedHostBuffer(void *HstPtr) {
+ assert(HstPtr && "Invalid pointer");
+
+ std::lock_guard<std::shared_mutex> Lock(Mutex);
+
+ // Check whether there is any intersecting entry.
+ const EntryTy *Entry = findIntersecting(HstPtr);
+
+ // No entry but automatic locking of mapped buffers is disabled, so
+ // nothing to do.
+ if (!Entry && !LockMappedBuffers)
+ return Plugin::success();
+
+ // No entry, automatic locking is enabled, but the locking may have failed, so
+ // do nothing.
+ if (!Entry && IgnoreLockMappedFailures)
+ return Plugin::success();
+
+ // No entry, but the automatic locking is enabled, so this is an error.
+ if (!Entry)
+ return Plugin::error("Locked buffer not found");
+
+ // There is entry, so unregister a user and check whether it was the last one.
+ auto LastUseOrErr = unregisterEntryUse(*Entry);
+ if (!LastUseOrErr)
+ return LastUseOrErr.takeError();
+
+ // If it is not the last one, there is nothing to do.
+ if (!(*LastUseOrErr))
+ return Plugin::success();
+
+ // Otherwise, if it was the last and the buffer was locked by the plugin,
+ // unlock it.
+ if (!Entry->ExternallyLocked)
+ if (auto Err = Device.dataUnlockImpl(Entry->HstPtr))
+ return Err;
+
+ // Finally erase the entry from the map.
+ return eraseEntry(*Entry);
}
Error GenericDeviceTy::synchronize(__tgt_async_info *AsyncInfo) {
@@ -1116,6 +1245,29 @@ int32_t __tgt_rtl_data_unlock(int32_t DeviceId, void *Ptr) {
return OFFLOAD_SUCCESS;
}
+int32_t __tgt_rtl_data_notify_mapped(int32_t DeviceId, void *HstPtr,
+ int64_t Size) {
+ auto Err = Plugin::get().getDevice(DeviceId).notifyDataMapped(HstPtr, Size);
+ if (Err) {
+ REPORT("Failure to notify data mapped %p: %s\n", HstPtr,
+ toString(std::move(Err)).data());
+ return OFFLOAD_FAIL;
+ }
+
+ return OFFLOAD_SUCCESS;
+}
+
+int32_t __tgt_rtl_data_notify_unmapped(int32_t DeviceId, void *HstPtr) {
+ auto Err = Plugin::get().getDevice(DeviceId).notifyDataUnmapped(HstPtr);
+ if (Err) {
+ REPORT("Failure to notify data unmapped %p: %s\n", HstPtr,
+ toString(std::move(Err)).data());
+ return OFFLOAD_FAIL;
+ }
+
+ return OFFLOAD_SUCCESS;
+}
+
int32_t __tgt_rtl_data_submit(int32_t DeviceId, void *TgtPtr, void *HstPtr,
int64_t Size) {
return __tgt_rtl_data_submit_async(DeviceId, TgtPtr, HstPtr, Size,
diff --git a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
index 11157e5d0f390..0f12f28304c28 100644
--- a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
+++ b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
@@ -269,20 +269,27 @@ class PinnedAllocationMapTy {
/// The size of the pinned allocation.
size_t Size;
+ /// Indicate whether the allocation was locked from outside the plugin, for
+ /// instance, from the application. The externally locked allocations are
+ /// not unlocked by the plugin when unregistering the last user.
+ bool ExternallyLocked;
+
/// The number of references to the pinned allocation. The allocation should
/// remain pinned and registered to the map until the number of references
/// becomes zero.
mutable size_t References;
- /// Create an entry with the host and device acessible pointers, and the
- /// buffer size.
- EntryTy(void *HstPtr, void *DevAccessiblePtr, size_t Size)
+ /// Create an entry with the host and device acessible pointers, the buffer
+ /// size, and a boolean indicating whether the buffer was locked externally.
+ EntryTy(void *HstPtr, void *DevAccessiblePtr, size_t Size,
+ bool ExternallyLocked)
: HstPtr(HstPtr), DevAccessiblePtr(DevAccessiblePtr), Size(Size),
- References(1) {}
+ ExternallyLocked(ExternallyLocked), References(1) {}
/// Utility constructor used for std::set searches.
EntryTy(void *HstPtr)
- : HstPtr(HstPtr), DevAccessiblePtr(nullptr), Size(0), References(0) {}
+ : HstPtr(HstPtr), DevAccessiblePtr(nullptr), Size(0),
+ ExternallyLocked(false), References(0) {}
};
/// Comparator of mep entries. Use the host pointer to enforce an order
@@ -304,54 +311,117 @@ class PinnedAllocationMapTy {
/// Reference to the corresponding device.
GenericDeviceTy &Device;
- /// Find an allocation that intersects with \p Buffer pointer. Assume
- /// the map's mutex is acquired.
- PinnedAllocSetTy::iterator findIntersecting(const void *Buffer) const {
+ /// Indicate whether mapped host buffers should be locked automatically.
+ bool LockMappedBuffers;
+
+ /// Indicate whether failures when locking mapped buffers should be ingored.
+ bool IgnoreLockMappedFailures;
+
+ /// Find an allocation that intersects with \p HstPtr pointer. Assume the
+ /// map's mutex is acquired.
+ const EntryTy *findIntersecting(const void *HstPtr) const {
if (Allocs.empty())
- return Allocs.end();
+ return nullptr;
// Search the first allocation with starting address that is not less than
// the buffer address.
- auto It = Allocs.lower_bound({const_cast<void *>(Buffer)});
+ auto It = Allocs.lower_bound({const_cast<void *>(HstPtr)});
// Direct match of starting addresses.
- if (It != Allocs.end() && It->HstPtr == Buffer)
- return It;
+ if (It != Allocs.end() && It->HstPtr == HstPtr)
+ return &(*It);
// Not direct match but may be a previous pinned allocation in the map which
// contains the buffer. Return false if there is no such a previous
// allocation.
if (It == Allocs.begin())
- return Allocs.end();
+ return nullptr;
// Move to the previous pinned allocation.
--It;
// The buffer is not contained in the pinned allocation.
- if (advanceVoidPtr(It->HstPtr, It->Size) > Buffer)
- return It;
+ if (advanceVoidPtr(It->HstPtr, It->Size) > HstPtr)
+ return &(*It);
// None found.
- return Allocs.end();
+ return nullptr;
+ }
+
+ /// Insert an entry to the map representing a locked buffer. The number of
+ /// references is set to one.
+ Error insertEntry(void *HstPtr, void *DevAccessiblePtr, size_t Size,
+ bool ExternallyLocked = false);
+
+ /// Erase an existing entry from the map.
+ Error eraseEntry(const EntryTy &Entry);
+
+ /// Register a new user into an entry that represents a locked buffer. Check
+ /// also that the registered buffer with \p HstPtr address and \p Size is
+ /// actually contained into the entry.
+ Error registerEntryUse(const EntryTy &Entry, void *HstPtr, size_t Size);
+
+ /// Unregister a user from the entry and return whether it is the last user.
+ /// If it is the last user, the entry will have to be removed from the map
+ /// and unlock the entry's host buffer (if necessary).
+ Expected<bool> unregisterEntryUse(const EntryTy &Entry);
+
+ /// Indicate whether the first range A fully contains the second range B.
+ static bool contains(void *PtrA, size_t SizeA, void *PtrB, size_t SizeB) {
+ void *EndA = advanceVoidPtr(PtrA, SizeA);
+ void *EndB = advanceVoidPtr(PtrB, SizeB);
+ return (PtrB >= PtrA && EndB <= EndA);
+ }
+
+ /// Indicate whether the first range A intersects with the second range B.
+ static bool intersects(void *PtrA, size_t SizeA, void *PtrB, size_t SizeB) {
+ void *EndA = advanceVoidPtr(PtrA, SizeA);
+ void *EndB = advanceVoidPtr(PtrB, SizeB);
+ return (PtrA < EndB && PtrB < EndA);
}
public:
/// Create the map of pinned allocations corresponding to a specific device.
- PinnedAllocationMapTy(GenericDeviceTy &Device) : Device(Device) {}
-
- /// Register a host buffer that was recently locked. None of the already
- /// registered pinned allocations should intersect with this new one. The
- /// registration requires the host pointer in \p HstPtr, the pointer that the
- /// devices should use when transferring data from/to the allocation in
- /// \p DevAccessiblePtr, and the size of the allocation in \p Size. Notice
- /// that some plugins may use the same pointer for the \p HstPtr and
- /// \p DevAccessiblePtr. The allocation must be unregistered using the
+ PinnedAllocationMapTy(GenericDeviceTy &Device) : Device(Device) {
+
+ // Envar that indicates whether mapped host buffers should be locked
+ // automatically. The possible values are boolean (on/off) and a special:
+ // off: Mapped host buffers are not locked.
+ // on: Mapped host buffers are locked in a best-effort approach.
+ // Failure to lock the buffers are silent.
+ // mandatory: Mapped host buffers are always locked and failures to lock
+ // a buffer results in a fatal error.
+ StringEnvar OMPX_LockMappedBuffers("LIBOMPTARGET_LOCK_MAPPED_HOST_BUFFERS", "off");
+
+ bool Enabled;
+ if (StringParser::parse(OMPX_LockMappedBuffers.get().data(), Enabled)) {
+ // Parsed as a boolean value. Enable the feature if necessary.
+ LockMappedBuffers = Enabled;
+ IgnoreLockMappedFailures = true;
+ } else if (OMPX_LockMappedBuffers.get() == "mandatory") {
+ // Enable the feature and failures are fatal.
+ LockMappedBuffers = true;
+ IgnoreLockMappedFailures = false;
+ } else {
+ // Disable by default.
+ DP("Invalid value LIBOMPTARGET_LOCK_MAPPED_HOST_BUFFERS=%s\n",
+ OMPX_LockMappedBuffers.get().data());
+ LockMappedBuffers = false;
+ }
+ }
+
+ /// Register a buffer that was recently allocated as a locked host buffer.
+ /// None of the already registered pinned allocations should intersect with
+ /// this new one. The registration requires the host pointer in \p HstPtr,
+ /// the device accessible pointer in \p DevAccessiblePtr, and the size of the
+ /// allocation in \p Size. The allocation must be unregistered using the
/// unregisterHostBuffer function.
Error registerHostBuffer(void *HstPtr, void *DevAccessiblePtr, size_t Size);
/// Unregister a host pinned allocation passing the host pointer which was
/// previously registered using the registerHostBuffer function. When calling
- /// this function, the pinned allocation cannot have any other user.
+ /// this function, the pinned allocation cannot have any other user and will
+ /// not be unlocked by this function.
Error unregisterHostBuffer(void *HstPtr);
/// Lock the host buffer at \p HstPtr or register a new user if it intersects
@@ -365,6 +435,15 @@ class PinnedAllocationMapTy {
/// pinned allocation is removed from the map and the memory is unlocked.
Error unlockHostBuffer(void *HstPtr);
+ /// Lock or register a host buffer that was recently mapped by libomptarget.
+ /// This behavior is applied if LIBOMPTARGET_LOCK_MAPPED_HOST_BUFFERS is
+ /// enabled. Even if not enabled, externally locked buffers are registered
+ /// in order to optimize their transfers.
+ Error lockMappedHostBuffer(void *HstPtr, size_t Size);
+
+ /// Unlock or unregister a host buffer that was unmapped by libomptarget.
+ Error unlockUnmappedHostBuffer(void *HstPtr);
+
/// Return the device accessible pointer associated to the host pinned
/// allocation which the \p HstPtr belongs, if any. Return null in case the
/// \p HstPtr does not belong to any host pinned allocation. The device
@@ -374,13 +453,12 @@ class PinnedAllocationMapTy {
std::shared_lock<std::shared_mutex> Lock(Mutex);
// Find the intersecting allocation if any.
- auto It = findIntersecting(HstPtr);
- if (It == Allocs.end())
+ const EntryTy *Entry = findIntersecting(HstPtr);
+ if (!Entry)
return nullptr;
- const EntryTy &Entry = *It;
- return advanceVoidPtr(Entry.DevAccessiblePtr,
- getPtrDiff(HstPtr, Entry.HstPtr));
+ return advanceVoidPtr(Entry->DevAccessiblePtr,
+ getPtrDiff(HstPtr, Entry->HstPtr));
}
/// Check whether a buffer belongs to a registered host pinned allocation.
@@ -388,7 +466,7 @@ class PinnedAllocationMapTy {
std::shared_lock<std::shared_mutex> Lock(Mutex);
// Return whether there is an intersecting allocation.
- return (findIntersecting(const_cast<void *>(HstPtr)) != Allocs.end());
+ return (findIntersecting(const_cast<void *>(HstPtr)) != nullptr);
}
};
@@ -457,15 +535,41 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
return PinnedAllocs.lockHostBuffer(HstPtr, Size);
}
- virtual Expected<void *> dataLockImpl(void *HstPtr, int64_t Size) = 0;
-
/// Unpin a host memory buffer that was previously pinned.
Error dataUnlock(void *HstPtr) {
return PinnedAllocs.unlockHostBuffer(HstPtr);
}
+ /// Lock the host buffer \p HstPtr with \p Size bytes with the vendor-specific
+ /// API and return the device accessible pointer.
+ virtual Expected<void *> dataLockImpl(void *HstPtr, int64_t Size) = 0;
+
+ /// Unlock a previously locked host buffer starting at \p HstPtr.
virtual Error dataUnlockImpl(void *HstPtr) = 0;
+ /// Mark the host buffer with address \p HstPtr and \p Size bytes as a mapped
+ /// buffer. This means that libomptarget created a new mapping of that host
+ /// buffer (e.g., because a user OpenMP target map) and the buffer may be used
+ /// as source/destination of memory transfers. We can use this information to
+ /// lock the host buffer and optimize its memory transfers.
+ Error notifyDataMapped(void *HstPtr, int64_t Size) {
+ return PinnedAllocs.lockMappedHostBuffer(HstPtr, Size);
+ }
+
+ /// Mark the host buffer with address \p HstPtr as unmapped. This means that
+ /// libomptarget removed an existing mapping. If the plugin locked the buffer
+ /// in notifyDataMapped, this function should unlock it.
+ Error notifyDataUnmapped(void *HstPtr) {
+ return PinnedAllocs.unlockUnmappedHostBuffer(HstPtr);
+ }
+
+ /// Check whether the host buffer with address \p HstPtr is pinned by the
+ /// underlying vendor-specific runtime (if any). Retrieve the host pointer,
+ /// the device accessible pointer and the size of the original pinned buffer.
+ virtual Expected<bool> isPinnedPtrImpl(void *HstPtr, void *&BaseHstPtr,
+ void *&BaseDevAccessiblePtr,
+ size_t &BaseSize) const = 0;
+
/// Submit data to the device (host to device transfer).
Error dataSubmit(void *TgtPtr, const void *HstPtr, int64_t Size,
__tgt_async_info *AsyncInfo);
diff --git a/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
index 672a44b9919c2..9e38d851196c0 100644
--- a/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
+++ b/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
@@ -500,6 +500,13 @@ struct CUDADeviceTy : public GenericDeviceTy {
Error dataUnlockImpl(void *HstPtr) override { return Plugin::success(); }
+ Expected<bool> isPinnedPtrImpl(void *HstPtr, void *&BaseHstPtr,
+ void *&BaseDevAccessiblePtr,
+ size_t &BaseSize) const override {
+ // TODO: Implement pinning feature for CUDA.
+ return false;
+ }
+
/// Submit data to the device (host to device transfer).
Error dataSubmitImpl(void *TgtPtr, const void *HstPtr, int64_t Size,
AsyncInfoWrapperTy &AsyncInfoWrapper) override {
diff --git a/openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp
index 7bc18b6bcbb92..a9b828826b1bb 100644
--- a/openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp
+++ b/openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp
@@ -224,6 +224,13 @@ struct GenELF64DeviceTy : public GenericDeviceTy {
/// Nothing to do when unlocking the buffer.
Error dataUnlockImpl(void *HstPtr) override { return Plugin::success(); }
+ /// Indicate that the buffer is not pinned.
+ Expected<bool> isPinnedPtrImpl(void *HstPtr, void *&BaseHstPtr,
+ void *&BaseDevAccessiblePtr,
+ size_t &BaseSize) const override {
+ return false;
+ }
+
/// Submit data to the device (host to device transfer).
Error dataSubmitImpl(void *TgtPtr, const void *HstPtr, int64_t Size,
AsyncInfoWrapperTy &AsyncInfoWrapper) override {
diff --git a/openmp/libomptarget/src/device.cpp b/openmp/libomptarget/src/device.cpp
index fb74d20cd25c0..d670bad1342bc 100644
--- a/openmp/libomptarget/src/device.cpp
+++ b/openmp/libomptarget/src/device.cpp
@@ -101,7 +101,8 @@ int DeviceTy::associatePtr(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size) {
NewEntry.dynRefCountToStr().c_str(), NewEntry.holdRefCountToStr().c_str());
(void)NewEntry;
- return OFFLOAD_SUCCESS;
+ // Notify the plugin about the new mapping.
+ return notifyDataMapped(HstPtrBegin, Size);
}
int DeviceTy::disassociatePtr(void *HstPtrBegin) {
@@ -124,7 +125,9 @@ int DeviceTy::disassociatePtr(void *HstPtrBegin) {
if (Event)
destroyEvent(Event);
HDTTMap->erase(It);
- return OFFLOAD_SUCCESS;
+
+ // Notify the plugin about the unmapped memory.
+ return notifyDataUnmapped(HstPtrBegin);
} else {
REPORT("Trying to disassociate a pointer which was not mapped via "
"omp_target_associate_ptr\n");
@@ -305,6 +308,12 @@ TargetPointerResultTy DeviceTy::getTargetPointer(
Entry->dynRefCountToStr().c_str(), Entry->holdRefCountToStr().c_str(),
(HstPtrName) ? getNameFromMapping(HstPtrName).c_str() : "unknown");
TargetPointer = (void *)Ptr;
+
+ // Notify the plugin about the new mapping.
+ if (notifyDataMapped(HstPtrBegin, Size))
+ return {{false /* IsNewEntry */, false /* IsHostPointer */},
+ nullptr /* Entry */,
+ nullptr /* TargetPointer */};
} else {
// This entry is not present and we did not create a new entry for it.
IsPresent = false;
@@ -485,6 +494,10 @@ int DeviceTy::deallocTgtPtrAndEntry(HostDataToTargetTy *Entry, int64_t Size) {
}
int Ret = deleteData((void *)Entry->TgtPtrBegin);
+
+ // Notify the plugin about the unmapped memory.
+ Ret |= notifyDataUnmapped((void *)Entry->HstPtrBegin);
+
delete Entry;
return Ret;
@@ -591,6 +604,33 @@ int32_t DeviceTy::dataExchange(void *SrcPtr, DeviceTy &DstDev, void *DstPtr,
DstPtr, Size, AsyncInfo);
}
+int32_t DeviceTy::notifyDataMapped(void *HstPtr, int64_t Size) {
+ if (!RTL->data_notify_mapped)
+ return OFFLOAD_SUCCESS;
+
+ DP("Notifying about new mapping: HstPtr=" DPxMOD ", Size=%" PRId64 "\n",
+ DPxPTR(HstPtr), Size);
+
+ if (RTL->data_notify_mapped(RTLDeviceID, HstPtr, Size)) {
+ REPORT("Notifiying about data mapping failed.\n");
+ return OFFLOAD_FAIL;
+ }
+ return OFFLOAD_SUCCESS;
+}
+
+int32_t DeviceTy::notifyDataUnmapped(void *HstPtr) {
+ if (!RTL->data_notify_unmapped)
+ return OFFLOAD_SUCCESS;
+
+ DP("Notifying about an unmapping: HstPtr=" DPxMOD "\n", DPxPTR(HstPtr));
+
+ if (RTL->data_notify_unmapped(RTLDeviceID, HstPtr)) {
+ REPORT("Notifiying about data unmapping failed.\n");
+ return OFFLOAD_FAIL;
+ }
+ return OFFLOAD_SUCCESS;
+}
+
// Run region on device
int32_t DeviceTy::launchKernel(void *TgtEntryPtr, void **TgtVarsPtr,
ptr
diff _t *TgtOffsets,
diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp
index 194c414863903..b0d10dfb40505 100644
--- a/openmp/libomptarget/src/omptarget.cpp
+++ b/openmp/libomptarget/src/omptarget.cpp
@@ -212,6 +212,10 @@ static int initLibrary(DeviceTy &Device) {
(uintptr_t)CurrDeviceEntry->addr /*TgtPtrBegin*/,
false /*UseHoldRefCount*/, CurrHostEntry->name,
true /*IsRefCountINF*/));
+
+ // Notify about the new mapping.
+ if (Device.notifyDataMapped(CurrHostEntry->addr, CurrHostEntry->size))
+ return OFFLOAD_FAIL;
}
}
}
diff --git a/openmp/libomptarget/src/rtl.cpp b/openmp/libomptarget/src/rtl.cpp
index 230a8295c48b4..9c7cc355d0546 100644
--- a/openmp/libomptarget/src/rtl.cpp
+++ b/openmp/libomptarget/src/rtl.cpp
@@ -246,6 +246,10 @@ bool RTLsTy::attemptLoadRTL(const std::string &RTLName, RTLInfoTy &RTL) {
DynLibrary->getAddressOfSymbol("__tgt_rtl_data_lock");
*((void **)&RTL.data_unlock) =
DynLibrary->getAddressOfSymbol("__tgt_rtl_data_unlock");
+ *((void **)&RTL.data_notify_mapped) =
+ DynLibrary->getAddressOfSymbol("__tgt_rtl_data_notify_mapped");
+ *((void **)&RTL.data_notify_unmapped) =
+ DynLibrary->getAddressOfSymbol("__tgt_rtl_data_notify_unmapped");
RTL.LibraryHandler = std::move(DynLibrary);
diff --git a/openmp/libomptarget/test/lit.cfg b/openmp/libomptarget/test/lit.cfg
index 08cd23c411f0f..a10e1e8045884 100644
--- a/openmp/libomptarget/test/lit.cfg
+++ b/openmp/libomptarget/test/lit.cfg
@@ -25,6 +25,9 @@ if 'LIBOMPTARGET_DEBUG' in os.environ:
if 'LIBOMPTARGET_NEXTGEN_PLUGINS' in os.environ:
config.environment['LIBOMPTARGET_NEXTGEN_PLUGINS'] = os.environ['LIBOMPTARGET_NEXTGEN_PLUGINS']
+if 'LIBOMPTARGET_LOCK_MAPPED_HOST_BUFFERS' in os.environ:
+ config.environment['LIBOMPTARGET_LOCK_MAPPED_HOST_BUFFERS'] = os.environ['LIBOMPTARGET_LOCK_MAPPED_HOST_BUFFERS']
+
if 'OMP_TARGET_OFFLOAD' in os.environ:
config.environment['OMP_TARGET_OFFLOAD'] = os.environ['OMP_TARGET_OFFLOAD']
More information about the Openmp-commits
mailing list