[Openmp-commits] [openmp] 5216a9b - [Libmoptarget] Enable the shared allocator for AMDGPU

Mon Feb 20 06:44:23 PST 2023

Author: Joseph Huber
Date: 2023-02-20T08:44:08-06:00
New Revision: 5216a9bfb002705fcaee7a078cbbb538e806eea1

URL: https://github.com/llvm/llvm-project/commit/5216a9bfb002705fcaee7a078cbbb538e806eea1
DIFF: https://github.com/llvm/llvm-project/commit/5216a9bfb002705fcaee7a078cbbb538e806eea1.diff

LOG: [Libmoptarget] Enable the shared allocator for AMDGPU

Currently, the AMDGPU plugin did not support the `TARGET_ALLOC_SHARED`
allocation kind. We used the fine-grained memory allocator for the
"host" alloc when this is most likely not what is intended. Fine-grained
memory can be accessed by all agents, so it should be considered shared.
This patch removes the use of fine-grained memory for the host
allocator. A later patch will add support for this via the
`hsa_amd_memory_lock` method.

Reviewed By: kevinsala

Differential Revision: https://reviews.llvm.org/D143771

Added: 
    

Modified: 
    openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
    openmp/libomptarget/test/api/omp_device_managed_memory.c
    openmp/libomptarget/test/api/omp_host_pinned_memory.c
    openmp/libomptarget/test/api/omp_host_pinned_memory_alloc.c

Removed: 
    


################################################################################
diff  --git a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
index 983265f6ef96f..5bb62f8924726 100644

--- a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -50,7 +50,7 @@
 #endif
 #else
 #include "hsa/hsa.h"
-#include "hsa_ext_amd.h"
+#include "hsa/hsa_ext_amd.h"
 #endif
 
 namespace llvm {
@@ -1438,7 +1438,7 @@ struct AMDHostDeviceTy : public AMDGenericDeviceTy {
     if (auto Err = ArgsMemoryManager.init(getArgsMemoryPool()))
       return Err;
 
-    if (auto Err = PinnedMemoryManager.init(getHostMemoryPool()))
+    if (auto Err = PinnedMemoryManager.init(getFineGrainedMemoryPool()))
       return Err;
 
     return Plugin::success();
@@ -1478,8 +1478,8 @@ struct AMDHostDeviceTy : public AMDGenericDeviceTy {
   /// Get one of the host agents. Return always the first agent.
   hsa_agent_t getAgent() const override { return Agents[0]; }
 
-  /// Get a memory pool for host pinned allocations.
-  AMDGPUMemoryPoolTy &getHostMemoryPool() {
+  /// Get a memory pool for fine-grained allocations.
+  AMDGPUMemoryPoolTy &getFineGrainedMemoryPool() {
     assert(!FineGrainedMemoryPools.empty() && "No fine-grained mempool");
     // Retrive any memory pool.
     return *FineGrainedMemoryPools[0];
@@ -1762,12 +1762,9 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
       MemoryPool = CoarseGrainedMemoryPools[0];
       break;
     case TARGET_ALLOC_HOST:
-      MemoryPool = &HostDevice.getHostMemoryPool();
       break;
     case TARGET_ALLOC_SHARED:
-      // TODO: Not supported yet. We could look at fine-grained host memory
-      // pools that are accessible by this device. The allocation should be made
-      // explicitly accessible if it is not yet.
+      MemoryPool = &HostDevice.getFineGrainedMemoryPool();
       break;
     }
 
@@ -2626,12 +2623,9 @@ void *AMDGPUDeviceTy::allocate(size_t Size, void *, TargetAllocTy Kind) {
     MemoryPool = CoarseGrainedMemoryPools[0];
     break;
   case TARGET_ALLOC_HOST:
-    MemoryPool = &HostDevice.getHostMemoryPool();
     break;
   case TARGET_ALLOC_SHARED:
-    // TODO: Not supported yet. We could look at fine-grained host memory
-    // pools that are accessible by this device. The allocation should be made
-    // explicitly accessible if it is not yet.
+    MemoryPool = &HostDevice.getFineGrainedMemoryPool();
     break;
   }
 
@@ -2647,10 +2641,10 @@ void *AMDGPUDeviceTy::allocate(size_t Size, void *, TargetAllocTy Kind) {
     return nullptr;
   }
 
-  if (Kind == TARGET_ALLOC_HOST && Alloc) {
+  if (Alloc && (Kind == TARGET_ALLOC_HOST || Kind == TARGET_ALLOC_SHARED)) {
     auto &KernelAgents = Plugin::get<AMDGPUPluginTy>().getKernelAgents();
 
-    // Enable all kernel agents to access the host pinned buffer.
+    // Enable all kernel agents to access the buffer.
     if (auto Err = MemoryPool->enableAccess(Alloc, Size, KernelAgents)) {
       REPORT("%s\n", toString(std::move(Err)).data());
       return nullptr;

diff  --git a/openmp/libomptarget/test/api/omp_device_managed_memory.c b/openmp/libomptarget/test/api/omp_device_managed_memory.c
index 80b3559466100..2a9fe09a8334c 100644
--- a/openmp/libomptarget/test/api/omp_device_managed_memory.c
+++ b/openmp/libomptarget/test/api/omp_device_managed_memory.c
@@ -1,5 +1,4 @@
-// RUN: %libomptarget-compile-run-and-check-nvptx64-nvidia-cuda
-// REQUIRES: nvptx64-nvidia-cuda
+// RUN: %libomptarget-compile-run-and-check-generic
 
 #include <omp.h>
 #include <stdio.h>

diff  --git a/openmp/libomptarget/test/api/omp_host_pinned_memory.c b/openmp/libomptarget/test/api/omp_host_pinned_memory.c
index 7a6a00d489d5a..d7238058481a0 100644
--- a/openmp/libomptarget/test/api/omp_host_pinned_memory.c
+++ b/openmp/libomptarget/test/api/omp_host_pinned_memory.c
@@ -1,4 +1,5 @@
 // RUN: %libomptarget-compile-run-and-check-generic
+// UNSUPPORTED: amdgcn-amd-amdhsa
 
 #include <omp.h>
 #include <stdio.h>

diff  --git a/openmp/libomptarget/test/api/omp_host_pinned_memory_alloc.c b/openmp/libomptarget/test/api/omp_host_pinned_memory_alloc.c
index 0b8118fa219af..03dc92af1794e 100644
--- a/openmp/libomptarget/test/api/omp_host_pinned_memory_alloc.c
+++ b/openmp/libomptarget/test/api/omp_host_pinned_memory_alloc.c
@@ -1,4 +1,5 @@
 // RUN: %libomptarget-compile-run-and-check-generic
+// UNSUPPORTED: amdgcn-amd-amdhsa
 
 #include <omp.h>
 #include <stdio.h>