[Openmp-commits] [openmp] 5216a9b - [Libmoptarget] Enable the shared allocator for AMDGPU
Joseph Huber via Openmp-commits
openmp-commits at lists.llvm.org
Mon Feb 20 06:44:23 PST 2023
Author: Joseph Huber
Date: 2023-02-20T08:44:08-06:00
New Revision: 5216a9bfb002705fcaee7a078cbbb538e806eea1
URL: https://github.com/llvm/llvm-project/commit/5216a9bfb002705fcaee7a078cbbb538e806eea1
DIFF: https://github.com/llvm/llvm-project/commit/5216a9bfb002705fcaee7a078cbbb538e806eea1.diff
LOG: [Libmoptarget] Enable the shared allocator for AMDGPU
Currently, the AMDGPU plugin did not support the `TARGET_ALLOC_SHARED`
allocation kind. We used the fine-grained memory allocator for the
"host" alloc when this is most likely not what is intended. Fine-grained
memory can be accessed by all agents, so it should be considered shared.
This patch removes the use of fine-grained memory for the host
allocator. A later patch will add support for this via the
`hsa_amd_memory_lock` method.
Reviewed By: kevinsala
Differential Revision: https://reviews.llvm.org/D143771
Added:
Modified:
openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
openmp/libomptarget/test/api/omp_device_managed_memory.c
openmp/libomptarget/test/api/omp_host_pinned_memory.c
openmp/libomptarget/test/api/omp_host_pinned_memory_alloc.c
Removed:
################################################################################
diff --git a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
index 983265f6ef96f..5bb62f8924726 100644
--- a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -50,7 +50,7 @@
#endif
#else
#include "hsa/hsa.h"
-#include "hsa_ext_amd.h"
+#include "hsa/hsa_ext_amd.h"
#endif
namespace llvm {
@@ -1438,7 +1438,7 @@ struct AMDHostDeviceTy : public AMDGenericDeviceTy {
if (auto Err = ArgsMemoryManager.init(getArgsMemoryPool()))
return Err;
- if (auto Err = PinnedMemoryManager.init(getHostMemoryPool()))
+ if (auto Err = PinnedMemoryManager.init(getFineGrainedMemoryPool()))
return Err;
return Plugin::success();
@@ -1478,8 +1478,8 @@ struct AMDHostDeviceTy : public AMDGenericDeviceTy {
/// Get one of the host agents. Return always the first agent.
hsa_agent_t getAgent() const override { return Agents[0]; }
- /// Get a memory pool for host pinned allocations.
- AMDGPUMemoryPoolTy &getHostMemoryPool() {
+ /// Get a memory pool for fine-grained allocations.
+ AMDGPUMemoryPoolTy &getFineGrainedMemoryPool() {
assert(!FineGrainedMemoryPools.empty() && "No fine-grained mempool");
// Retrive any memory pool.
return *FineGrainedMemoryPools[0];
@@ -1762,12 +1762,9 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
MemoryPool = CoarseGrainedMemoryPools[0];
break;
case TARGET_ALLOC_HOST:
- MemoryPool = &HostDevice.getHostMemoryPool();
break;
case TARGET_ALLOC_SHARED:
- // TODO: Not supported yet. We could look at fine-grained host memory
- // pools that are accessible by this device. The allocation should be made
- // explicitly accessible if it is not yet.
+ MemoryPool = &HostDevice.getFineGrainedMemoryPool();
break;
}
@@ -2626,12 +2623,9 @@ void *AMDGPUDeviceTy::allocate(size_t Size, void *, TargetAllocTy Kind) {
MemoryPool = CoarseGrainedMemoryPools[0];
break;
case TARGET_ALLOC_HOST:
- MemoryPool = &HostDevice.getHostMemoryPool();
break;
case TARGET_ALLOC_SHARED:
- // TODO: Not supported yet. We could look at fine-grained host memory
- // pools that are accessible by this device. The allocation should be made
- // explicitly accessible if it is not yet.
+ MemoryPool = &HostDevice.getFineGrainedMemoryPool();
break;
}
@@ -2647,10 +2641,10 @@ void *AMDGPUDeviceTy::allocate(size_t Size, void *, TargetAllocTy Kind) {
return nullptr;
}
- if (Kind == TARGET_ALLOC_HOST && Alloc) {
+ if (Alloc && (Kind == TARGET_ALLOC_HOST || Kind == TARGET_ALLOC_SHARED)) {
auto &KernelAgents = Plugin::get<AMDGPUPluginTy>().getKernelAgents();
- // Enable all kernel agents to access the host pinned buffer.
+ // Enable all kernel agents to access the buffer.
if (auto Err = MemoryPool->enableAccess(Alloc, Size, KernelAgents)) {
REPORT("%s\n", toString(std::move(Err)).data());
return nullptr;
diff --git a/openmp/libomptarget/test/api/omp_device_managed_memory.c b/openmp/libomptarget/test/api/omp_device_managed_memory.c
index 80b3559466100..2a9fe09a8334c 100644
--- a/openmp/libomptarget/test/api/omp_device_managed_memory.c
+++ b/openmp/libomptarget/test/api/omp_device_managed_memory.c
@@ -1,5 +1,4 @@
-// RUN: %libomptarget-compile-run-and-check-nvptx64-nvidia-cuda
-// REQUIRES: nvptx64-nvidia-cuda
+// RUN: %libomptarget-compile-run-and-check-generic
#include <omp.h>
#include <stdio.h>
diff --git a/openmp/libomptarget/test/api/omp_host_pinned_memory.c b/openmp/libomptarget/test/api/omp_host_pinned_memory.c
index 7a6a00d489d5a..d7238058481a0 100644
--- a/openmp/libomptarget/test/api/omp_host_pinned_memory.c
+++ b/openmp/libomptarget/test/api/omp_host_pinned_memory.c
@@ -1,4 +1,5 @@
// RUN: %libomptarget-compile-run-and-check-generic
+// UNSUPPORTED: amdgcn-amd-amdhsa
#include <omp.h>
#include <stdio.h>
diff --git a/openmp/libomptarget/test/api/omp_host_pinned_memory_alloc.c b/openmp/libomptarget/test/api/omp_host_pinned_memory_alloc.c
index 0b8118fa219af..03dc92af1794e 100644
--- a/openmp/libomptarget/test/api/omp_host_pinned_memory_alloc.c
+++ b/openmp/libomptarget/test/api/omp_host_pinned_memory_alloc.c
@@ -1,4 +1,5 @@
// RUN: %libomptarget-compile-run-and-check-generic
+// UNSUPPORTED: amdgcn-amd-amdhsa
#include <omp.h>
#include <stdio.h>
More information about the Openmp-commits
mailing list