[Openmp-commits] [openmp] e5354a2 - [OpenMP][libomptarget] Centralize host pinned buffers map to NextGen's PluginInterface
Kevin Sala via Openmp-commits
openmp-commits at lists.llvm.org
Wed Dec 21 17:12:07 PST 2022
Author: Kevin Sala
Date: 2022-12-22T02:11:05+01:00
New Revision: e5354a2bfa924cbcc72fd503efd619037e86ac9c
URL: https://github.com/llvm/llvm-project/commit/e5354a2bfa924cbcc72fd503efd619037e86ac9c
DIFF: https://github.com/llvm/llvm-project/commit/e5354a2bfa924cbcc72fd503efd619037e86ac9c.diff
LOG: [OpenMP][libomptarget] Centralize host pinned buffers map to NextGen's PluginInterface
This patch moves the management/tracking of host pinned buffers to the common PluginInterface
in NextGen plugins. For the moment, the management consists of tracking the host pinned
allocations into a map in each device.
Differential Revision: https://reviews.llvm.org/D140502
Added:
Modified:
openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
Removed:
################################################################################
diff --git a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
index 73e3bd31406ba..2d1ca52aad227 100644
--- a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -17,7 +17,6 @@
#include <hsa.h>
#include <hsa_ext_amd.h>
#include <mutex>
-#include <shared_mutex>
#include <string>
#include <unistd.h>
#include <unordered_map>
@@ -1721,15 +1720,6 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
return OFFLOAD_FAIL;
}
- if (Kind == TARGET_ALLOC_HOST) {
- std::lock_guard<std::shared_mutex> Lock(HostAllocationsMutex);
- size_t Erased = HostAllocations.erase(TgtPtr);
- if (!Erased) {
- REPORT("Cannot find a host allocation in the map\n");
- return OFFLOAD_FAIL;
- }
- }
-
return OFFLOAD_SUCCESS;
}
@@ -1779,7 +1769,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
AsyncInfoWrapperTy &AsyncInfoWrapper) override {
// Use one-step asynchronous operation when host memory is already pinned.
- if (isHostPinnedMemory(HstPtr)) {
+ if (isHostPinnedMemoryBuffer(HstPtr)) {
AMDGPUStreamTy &Stream = getStream(AsyncInfoWrapper);
return Stream.pushPinnedMemoryCopyAsync(TgtPtr, HstPtr, Size);
}
@@ -1833,7 +1823,9 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
/// Retrieve data from the device (device to host transfer).
Error dataRetrieveImpl(void *HstPtr, const void *TgtPtr, int64_t Size,
AsyncInfoWrapperTy &AsyncInfoWrapper) override {
- if (isHostPinnedMemory(HstPtr)) {
+
+ // Use one-step asynchronous operation when host memory is already pinned.
+ if (isHostPinnedMemoryBuffer(HstPtr)) {
// Use one-step asynchronous operation when host memory is already pinned.
AMDGPUStreamTy &Stream = getStream(AsyncInfoWrapper);
return Stream.pushPinnedMemoryCopyAsync(HstPtr, TgtPtr, Size);
@@ -2005,23 +1997,6 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
return Queues[Current % Queues.size()];
}
- /// Check whether a buffer is a host pinned buffer.
- bool isHostPinnedMemory(const void *Ptr) const {
- bool Found = false;
- HostAllocationsMutex.lock_shared();
- if (!HostAllocations.empty()) {
- auto It = HostAllocations.lower_bound((const void *)Ptr);
- if (It != HostAllocations.end() && It->first == Ptr) {
- Found = true;
- } else if (It != HostAllocations.begin()) {
- --It;
- Found = ((const char *)It->first + It->second > (const char *)Ptr);
- }
- }
- HostAllocationsMutex.unlock_shared();
- return Found;
- }
-
private:
using AMDGPUStreamRef = AMDGPUResourceRef<AMDGPUStreamTy>;
using AMDGPUEventRef = AMDGPUResourceRef<AMDGPUEventTy>;
@@ -2073,12 +2048,6 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
/// List of device packet queues.
std::vector<AMDGPUQueueTy> Queues;
-
- /// Map of host pinned allocations. We track these pinned allocations so that
- /// memory transfers involving these allocations do not need a two-step copy
- /// with an intermediate pinned buffer.
- std::map<const void *, size_t> HostAllocations;
- mutable std::shared_mutex HostAllocationsMutex;
};
Error AMDGPUDeviceImageTy::loadExecutable(const AMDGPUDeviceTy &Device) {
@@ -2537,10 +2506,6 @@ void *AMDGPUDeviceTy::allocate(size_t Size, void *, TargetAllocTy Kind) {
REPORT("%s\n", toString(std::move(Err)).data());
return nullptr;
}
-
- // Keep track of the host pinned allocations for optimizations in transfers.
- std::lock_guard<std::shared_mutex> Lock(HostAllocationsMutex);
- HostAllocations.insert({Alloc, Size});
}
return Alloc;
diff --git a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
index 6ce6c29481c55..9f52562dc9571 100644
--- a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
+++ b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
@@ -356,6 +356,27 @@ Error GenericDeviceTy::registerKernelOffloadEntry(
return Plugin::success();
}
+Error GenericDeviceTy::registerHostPinnedMemoryBuffer(const void *Buffer,
+ size_t Size) {
+ std::lock_guard<std::shared_mutex> Lock(HostAllocationsMutex);
+
+ auto Res = HostAllocations.insert({Buffer, Size});
+ if (!Res.second)
+ return Plugin::error("Registering an already registered pinned buffer");
+
+ return Plugin::success();
+}
+
+Error GenericDeviceTy::unregisterHostPinnedMemoryBuffer(const void *Buffer) {
+ std::lock_guard<std::shared_mutex> Lock(HostAllocationsMutex);
+
+ size_t Erased = HostAllocations.erase(Buffer);
+ if (!Erased)
+ return Plugin::error("Cannot find a registered host pinned buffer");
+
+ return Plugin::success();
+}
+
Error GenericDeviceTy::synchronize(__tgt_async_info *AsyncInfo) {
if (!AsyncInfo || !AsyncInfo->Queue)
return Plugin::error("Invalid async info queue");
@@ -391,14 +412,18 @@ Expected<void *> GenericDeviceTy::dataAlloc(int64_t Size, void *HostPtr,
return Plugin::error("Failed to allocate from device allocator");
}
- // Sucessful and valid allocation.
- if (Alloc)
- return Alloc;
+ // Report error if the memory manager or the device allocator did not return
+ // any memory buffer.
+ if (!Alloc)
+ return Plugin::error("Invalid target data allocation kind or requested "
+ "allocator not implemented yet");
- // At this point means that we did not tried to allocate from the memory
- // manager nor the device allocator.
- return Plugin::error("Invalid target data allocation kind or requested "
- "allocator not implemented yet");
+ // Register allocated buffer as pinned memory if the type is host memory.
+ if (Kind == TARGET_ALLOC_HOST)
+ if (auto Err = registerHostPinnedMemoryBuffer(Alloc, Size))
+ return Err;
+
+ return Alloc;
}
Error GenericDeviceTy::dataDelete(void *TgtPtr, TargetAllocTy Kind) {
@@ -411,6 +436,11 @@ Error GenericDeviceTy::dataDelete(void *TgtPtr, TargetAllocTy Kind) {
if (Res)
return Plugin::error("Failure to deallocate device pointer %p", TgtPtr);
+ // Unregister deallocated pinned memory buffer if the type is host memory.
+ if (Kind == TARGET_ALLOC_HOST)
+ if (auto Err = unregisterHostPinnedMemoryBuffer(TgtPtr))
+ return std::move(Err);
+
return Plugin::success();
}
diff --git a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
index 9b89e316551db..45b0fb0637576 100644
--- a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
+++ b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
@@ -15,6 +15,7 @@
#include <cstdint>
#include <list>
#include <map>
+#include <shared_mutex>
#include <vector>
#include "Debug.h"
@@ -405,6 +406,12 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
/// setupDeviceEnvironment() function.
virtual bool shouldSetupDeviceEnvironment() const { return true; }
+ /// Register a host buffer as host pinned allocation.
+ Error registerHostPinnedMemoryBuffer(const void *Buffer, size_t Size);
+
+ /// Unregister a host pinned allocations.
+ Error unregisterHostPinnedMemoryBuffer(const void *Buffer);
+
/// Pointer to the memory manager or nullptr if not available.
MemoryManagerTy *MemoryManager;
@@ -419,7 +426,40 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
UInt64Envar OMPX_TargetStackSize;
UInt64Envar OMPX_TargetHeapSize;
+ /// Map of host pinned allocations. We track these pinned allocations so that
+ /// memory transfers involving these allocations can be optimized.
+ std::map<const void *, size_t> HostAllocations;
+ mutable std::shared_mutex HostAllocationsMutex;
+
protected:
+ /// Check whether a buffer has been registered as host pinned memory.
+ bool isHostPinnedMemoryBuffer(const void *Buffer) const {
+ std::shared_lock<std::shared_mutex> Lock(HostAllocationsMutex);
+
+ if (HostAllocations.empty())
+ return false;
+
+ // Search the first allocation with starting address that is not less than
+ // the buffer address.
+ auto It = HostAllocations.lower_bound(Buffer);
+
+ // Direct match of starting addresses.
+ if (It != HostAllocations.end() && It->first == Buffer)
+ return true;
+
+ // Not direct match but may be a previous pinned allocation in the map which
+ // contains the buffer. Return false if there is no such a previous
+ // allocation.
+ if (It == HostAllocations.begin())
+ return false;
+
+ // Move to the previous pinned allocation.
+ --It;
+
+ // Evaluate whether the buffer is contained in the pinned allocation.
+ return ((const char *)It->first + It->second > (const char *)Buffer);
+ }
+
/// Environment variables defined by the LLVM OpenMP implementation
/// regarding the initial number of streams and events.
UInt32Envar OMPX_InitialNumStreams;
More information about the Openmp-commits
mailing list