[Openmp-commits] [openmp] e5354a2 - [OpenMP][libomptarget] Centralize host pinned buffers map to NextGen's PluginInterface

Kevin Sala via Openmp-commits openmp-commits at lists.llvm.org
Wed Dec 21 17:12:07 PST 2022


Author: Kevin Sala
Date: 2022-12-22T02:11:05+01:00
New Revision: e5354a2bfa924cbcc72fd503efd619037e86ac9c

URL: https://github.com/llvm/llvm-project/commit/e5354a2bfa924cbcc72fd503efd619037e86ac9c
DIFF: https://github.com/llvm/llvm-project/commit/e5354a2bfa924cbcc72fd503efd619037e86ac9c.diff

LOG: [OpenMP][libomptarget] Centralize host pinned buffers map to NextGen's PluginInterface

This patch moves the management/tracking of host pinned buffers to the common PluginInterface
in NextGen plugins. For the moment, the management consists of tracking the host pinned
allocations into a map in each device.

Differential Revision: https://reviews.llvm.org/D140502

Added: 
    

Modified: 
    openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
    openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
    openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h

Removed: 
    


################################################################################
diff  --git a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
index 73e3bd31406ba..2d1ca52aad227 100644
--- a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -17,7 +17,6 @@
 #include <hsa.h>
 #include <hsa_ext_amd.h>
 #include <mutex>
-#include <shared_mutex>
 #include <string>
 #include <unistd.h>
 #include <unordered_map>
@@ -1721,15 +1720,6 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
       return OFFLOAD_FAIL;
     }
 
-    if (Kind == TARGET_ALLOC_HOST) {
-      std::lock_guard<std::shared_mutex> Lock(HostAllocationsMutex);
-      size_t Erased = HostAllocations.erase(TgtPtr);
-      if (!Erased) {
-        REPORT("Cannot find a host allocation in the map\n");
-        return OFFLOAD_FAIL;
-      }
-    }
-
     return OFFLOAD_SUCCESS;
   }
 
@@ -1779,7 +1769,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
                        AsyncInfoWrapperTy &AsyncInfoWrapper) override {
 
     // Use one-step asynchronous operation when host memory is already pinned.
-    if (isHostPinnedMemory(HstPtr)) {
+    if (isHostPinnedMemoryBuffer(HstPtr)) {
       AMDGPUStreamTy &Stream = getStream(AsyncInfoWrapper);
       return Stream.pushPinnedMemoryCopyAsync(TgtPtr, HstPtr, Size);
     }
@@ -1833,7 +1823,9 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
   /// Retrieve data from the device (device to host transfer).
   Error dataRetrieveImpl(void *HstPtr, const void *TgtPtr, int64_t Size,
                          AsyncInfoWrapperTy &AsyncInfoWrapper) override {
-    if (isHostPinnedMemory(HstPtr)) {
+
+    // Use one-step asynchronous operation when host memory is already pinned.
+    if (isHostPinnedMemoryBuffer(HstPtr)) {
       // Use one-step asynchronous operation when host memory is already pinned.
       AMDGPUStreamTy &Stream = getStream(AsyncInfoWrapper);
       return Stream.pushPinnedMemoryCopyAsync(HstPtr, TgtPtr, Size);
@@ -2005,23 +1997,6 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
     return Queues[Current % Queues.size()];
   }
 
-  /// Check whether a buffer is a host pinned buffer.
-  bool isHostPinnedMemory(const void *Ptr) const {
-    bool Found = false;
-    HostAllocationsMutex.lock_shared();
-    if (!HostAllocations.empty()) {
-      auto It = HostAllocations.lower_bound((const void *)Ptr);
-      if (It != HostAllocations.end() && It->first == Ptr) {
-        Found = true;
-      } else if (It != HostAllocations.begin()) {
-        --It;
-        Found = ((const char *)It->first + It->second > (const char *)Ptr);
-      }
-    }
-    HostAllocationsMutex.unlock_shared();
-    return Found;
-  }
-
 private:
   using AMDGPUStreamRef = AMDGPUResourceRef<AMDGPUStreamTy>;
   using AMDGPUEventRef = AMDGPUResourceRef<AMDGPUEventTy>;
@@ -2073,12 +2048,6 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
 
   /// List of device packet queues.
   std::vector<AMDGPUQueueTy> Queues;
-
-  /// Map of host pinned allocations. We track these pinned allocations so that
-  /// memory transfers involving these allocations do not need a two-step copy
-  /// with an intermediate pinned buffer.
-  std::map<const void *, size_t> HostAllocations;
-  mutable std::shared_mutex HostAllocationsMutex;
 };
 
 Error AMDGPUDeviceImageTy::loadExecutable(const AMDGPUDeviceTy &Device) {
@@ -2537,10 +2506,6 @@ void *AMDGPUDeviceTy::allocate(size_t Size, void *, TargetAllocTy Kind) {
       REPORT("%s\n", toString(std::move(Err)).data());
       return nullptr;
     }
-
-    // Keep track of the host pinned allocations for optimizations in transfers.
-    std::lock_guard<std::shared_mutex> Lock(HostAllocationsMutex);
-    HostAllocations.insert({Alloc, Size});
   }
 
   return Alloc;

diff  --git a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
index 6ce6c29481c55..9f52562dc9571 100644
--- a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
+++ b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
@@ -356,6 +356,27 @@ Error GenericDeviceTy::registerKernelOffloadEntry(
   return Plugin::success();
 }
 
+Error GenericDeviceTy::registerHostPinnedMemoryBuffer(const void *Buffer,
+                                                      size_t Size) {
+  std::lock_guard<std::shared_mutex> Lock(HostAllocationsMutex);
+
+  auto Res = HostAllocations.insert({Buffer, Size});
+  if (!Res.second)
+    return Plugin::error("Registering an already registered pinned buffer");
+
+  return Plugin::success();
+}
+
+Error GenericDeviceTy::unregisterHostPinnedMemoryBuffer(const void *Buffer) {
+  std::lock_guard<std::shared_mutex> Lock(HostAllocationsMutex);
+
+  size_t Erased = HostAllocations.erase(Buffer);
+  if (!Erased)
+    return Plugin::error("Cannot find a registered host pinned buffer");
+
+  return Plugin::success();
+}
+
 Error GenericDeviceTy::synchronize(__tgt_async_info *AsyncInfo) {
   if (!AsyncInfo || !AsyncInfo->Queue)
     return Plugin::error("Invalid async info queue");
@@ -391,14 +412,18 @@ Expected<void *> GenericDeviceTy::dataAlloc(int64_t Size, void *HostPtr,
       return Plugin::error("Failed to allocate from device allocator");
   }
 
-  // Sucessful and valid allocation.
-  if (Alloc)
-    return Alloc;
+  // Report error if the memory manager or the device allocator did not return
+  // any memory buffer.
+  if (!Alloc)
+    return Plugin::error("Invalid target data allocation kind or requested "
+                         "allocator not implemented yet");
 
-  // At this point means that we did not tried to allocate from the memory
-  // manager nor the device allocator.
-  return Plugin::error("Invalid target data allocation kind or requested "
-                       "allocator not implemented yet");
+  // Register allocated buffer as pinned memory if the type is host memory.
+  if (Kind == TARGET_ALLOC_HOST)
+    if (auto Err = registerHostPinnedMemoryBuffer(Alloc, Size))
+      return Err;
+
+  return Alloc;
 }
 
 Error GenericDeviceTy::dataDelete(void *TgtPtr, TargetAllocTy Kind) {
@@ -411,6 +436,11 @@ Error GenericDeviceTy::dataDelete(void *TgtPtr, TargetAllocTy Kind) {
   if (Res)
     return Plugin::error("Failure to deallocate device pointer %p", TgtPtr);
 
+  // Unregister deallocated pinned memory buffer if the type is host memory.
+  if (Kind == TARGET_ALLOC_HOST)
+    if (auto Err = unregisterHostPinnedMemoryBuffer(TgtPtr))
+      return std::move(Err);
+
   return Plugin::success();
 }
 

diff  --git a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
index 9b89e316551db..45b0fb0637576 100644
--- a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
+++ b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
@@ -15,6 +15,7 @@
 #include <cstdint>
 #include <list>
 #include <map>
+#include <shared_mutex>
 #include <vector>
 
 #include "Debug.h"
@@ -405,6 +406,12 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
   /// setupDeviceEnvironment() function.
   virtual bool shouldSetupDeviceEnvironment() const { return true; }
 
+  /// Register a host buffer as host pinned allocation.
+  Error registerHostPinnedMemoryBuffer(const void *Buffer, size_t Size);
+
+  /// Unregister a host pinned allocations.
+  Error unregisterHostPinnedMemoryBuffer(const void *Buffer);
+
   /// Pointer to the memory manager or nullptr if not available.
   MemoryManagerTy *MemoryManager;
 
@@ -419,7 +426,40 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
   UInt64Envar OMPX_TargetStackSize;
   UInt64Envar OMPX_TargetHeapSize;
 
+  /// Map of host pinned allocations. We track these pinned allocations so that
+  /// memory transfers involving these allocations can be optimized.
+  std::map<const void *, size_t> HostAllocations;
+  mutable std::shared_mutex HostAllocationsMutex;
+
 protected:
+  /// Check whether a buffer has been registered as host pinned memory.
+  bool isHostPinnedMemoryBuffer(const void *Buffer) const {
+    std::shared_lock<std::shared_mutex> Lock(HostAllocationsMutex);
+
+    if (HostAllocations.empty())
+      return false;
+
+    // Search the first allocation with starting address that is not less than
+    // the buffer address.
+    auto It = HostAllocations.lower_bound(Buffer);
+
+    // Direct match of starting addresses.
+    if (It != HostAllocations.end() && It->first == Buffer)
+      return true;
+
+    // Not direct match but may be a previous pinned allocation in the map which
+    // contains the buffer. Return false if there is no such a previous
+    // allocation.
+    if (It == HostAllocations.begin())
+      return false;
+
+    // Move to the previous pinned allocation.
+    --It;
+
+    // Evaluate whether the buffer is contained in the pinned allocation.
+    return ((const char *)It->first + It->second > (const char *)Buffer);
+  }
+
   /// Environment variables defined by the LLVM OpenMP implementation
   /// regarding the initial number of streams and events.
   UInt32Envar OMPX_InitialNumStreams;


        


More information about the Openmp-commits mailing list