[llvm] [Offload] Remove non-blocking allocation type (PR #159851)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 19 14:35:23 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Joseph Huber (jhuber6)
<details>
<summary>Changes</summary>
Summary:
This was originally added in as a hack to work around CUDA's limitation
on allocation. The `libc` implementation now isn't even used for CUDA so
this code is never hit. Even if this case, this code never truly worked.
A true solution would be to use CUDA's virtual memory API instead to
allocate 2MiB slabs independenctly from the normal memory management
done in the stream.
---
Full diff: https://github.com/llvm/llvm-project/pull/159851.diff
7 Files Affected:
- (modified) offload/include/omptarget.h (-2)
- (modified) offload/plugins-nextgen/amdgpu/src/rtl.cpp (-2)
- (modified) offload/plugins-nextgen/common/include/ErrorReporting.h (-1)
- (modified) offload/plugins-nextgen/common/src/PluginInterface.cpp (-2)
- (modified) offload/plugins-nextgen/common/src/RPC.cpp (+2-2)
- (modified) offload/plugins-nextgen/cuda/src/rtl.cpp (-19)
- (modified) offload/plugins-nextgen/host/src/rtl.cpp (-1)
``````````diff
diff --git a/offload/include/omptarget.h b/offload/include/omptarget.h
index 8fd722bb15022..794b79e07674e 100644
--- a/offload/include/omptarget.h
+++ b/offload/include/omptarget.h
@@ -101,8 +101,6 @@ enum TargetAllocTy : int32_t {
TARGET_ALLOC_HOST,
TARGET_ALLOC_SHARED,
TARGET_ALLOC_DEFAULT,
- /// The allocation will not block on other streams.
- TARGET_ALLOC_DEVICE_NON_BLOCKING,
};
inline KernelArgsTy CTorDTorKernelArgs = {
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index 277fad29fd24f..64470e9fabf46 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -2350,7 +2350,6 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
switch (Kind) {
case TARGET_ALLOC_DEFAULT:
case TARGET_ALLOC_DEVICE:
- case TARGET_ALLOC_DEVICE_NON_BLOCKING:
MemoryPool = CoarseGrainedMemoryPools[0];
break;
case TARGET_ALLOC_HOST:
@@ -3847,7 +3846,6 @@ void *AMDGPUDeviceTy::allocate(size_t Size, void *, TargetAllocTy Kind) {
switch (Kind) {
case TARGET_ALLOC_DEFAULT:
case TARGET_ALLOC_DEVICE:
- case TARGET_ALLOC_DEVICE_NON_BLOCKING:
MemoryPool = CoarseGrainedMemoryPools[0];
break;
case TARGET_ALLOC_HOST:
diff --git a/offload/plugins-nextgen/common/include/ErrorReporting.h b/offload/plugins-nextgen/common/include/ErrorReporting.h
index 2ad0f2b7dd6c6..68d82cbea0f3c 100644
--- a/offload/plugins-nextgen/common/include/ErrorReporting.h
+++ b/offload/plugins-nextgen/common/include/ErrorReporting.h
@@ -61,7 +61,6 @@ class ErrorReporter {
/// Return a nice name for an TargetAllocTy.
static StringRef getAllocTyName(TargetAllocTy Kind) {
switch (Kind) {
- case TARGET_ALLOC_DEVICE_NON_BLOCKING:
case TARGET_ALLOC_DEFAULT:
case TARGET_ALLOC_DEVICE:
return "device memory";
diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp
index b1955b53b80e5..30b5db782370d 100644
--- a/offload/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp
@@ -1295,7 +1295,6 @@ Expected<void *> GenericDeviceTy::dataAlloc(int64_t Size, void *HostPtr,
switch (Kind) {
case TARGET_ALLOC_DEFAULT:
- case TARGET_ALLOC_DEVICE_NON_BLOCKING:
case TARGET_ALLOC_DEVICE:
if (MemoryManager) {
Alloc = MemoryManager->allocate(Size, HostPtr);
@@ -1386,7 +1385,6 @@ Error GenericDeviceTy::dataDelete(void *TgtPtr, TargetAllocTy Kind) {
int Res;
switch (Kind) {
case TARGET_ALLOC_DEFAULT:
- case TARGET_ALLOC_DEVICE_NON_BLOCKING:
case TARGET_ALLOC_DEVICE:
if (MemoryManager) {
Res = MemoryManager->free(TgtPtr);
diff --git a/offload/plugins-nextgen/common/src/RPC.cpp b/offload/plugins-nextgen/common/src/RPC.cpp
index 678be78b56af6..bd25a6f1017a8 100644
--- a/offload/plugins-nextgen/common/src/RPC.cpp
+++ b/offload/plugins-nextgen/common/src/RPC.cpp
@@ -29,14 +29,14 @@ rpc::Status handleOffloadOpcodes(plugin::GenericDeviceTy &Device,
case LIBC_MALLOC: {
Port.recv_and_send([&](rpc::Buffer *Buffer, uint32_t) {
Buffer->data[0] = reinterpret_cast<uintptr_t>(Device.allocate(
- Buffer->data[0], nullptr, TARGET_ALLOC_DEVICE_NON_BLOCKING));
+ Buffer->data[0], nullptr, TARGET_ALLOC_DEVICE));
});
break;
}
case LIBC_FREE: {
Port.recv([&](rpc::Buffer *Buffer, uint32_t) {
Device.free(reinterpret_cast<void *>(Buffer->data[0]),
- TARGET_ALLOC_DEVICE_NON_BLOCKING);
+ TARGET_ALLOC_DEVICE);
});
break;
}
diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp
index fdd470fcd8113..b2f840113cff3 100644
--- a/offload/plugins-nextgen/cuda/src/rtl.cpp
+++ b/offload/plugins-nextgen/cuda/src/rtl.cpp
@@ -587,16 +587,6 @@ struct CUDADeviceTy : public GenericDeviceTy {
Res = cuMemAllocManaged(&DevicePtr, Size, CU_MEM_ATTACH_GLOBAL);
MemAlloc = (void *)DevicePtr;
break;
- case TARGET_ALLOC_DEVICE_NON_BLOCKING: {
- CUstream Stream;
- if ((Res = cuStreamCreate(&Stream, CU_STREAM_NON_BLOCKING)))
- break;
- if ((Res = cuMemAllocAsync(&DevicePtr, Size, Stream)))
- break;
- cuStreamSynchronize(Stream);
- Res = cuStreamDestroy(Stream);
- MemAlloc = (void *)DevicePtr;
- }
}
if (auto Err =
@@ -627,15 +617,6 @@ struct CUDADeviceTy : public GenericDeviceTy {
case TARGET_ALLOC_HOST:
Res = cuMemFreeHost(TgtPtr);
break;
- case TARGET_ALLOC_DEVICE_NON_BLOCKING: {
- CUstream Stream;
- if ((Res = cuStreamCreate(&Stream, CU_STREAM_NON_BLOCKING)))
- break;
- cuMemFreeAsync(reinterpret_cast<CUdeviceptr>(TgtPtr), Stream);
- cuStreamSynchronize(Stream);
- if ((Res = cuStreamDestroy(Stream)))
- break;
- }
}
if (auto Err = Plugin::check(Res, "error in cuMemFree[Host]: %s")) {
diff --git a/offload/plugins-nextgen/host/src/rtl.cpp b/offload/plugins-nextgen/host/src/rtl.cpp
index 0db01ca09ab02..44e2584fe53cc 100644
--- a/offload/plugins-nextgen/host/src/rtl.cpp
+++ b/offload/plugins-nextgen/host/src/rtl.cpp
@@ -250,7 +250,6 @@ struct GenELF64DeviceTy : public GenericDeviceTy {
case TARGET_ALLOC_DEVICE:
case TARGET_ALLOC_HOST:
case TARGET_ALLOC_SHARED:
- case TARGET_ALLOC_DEVICE_NON_BLOCKING:
MemAlloc = std::malloc(Size);
break;
}
``````````
</details>
https://github.com/llvm/llvm-project/pull/159851
More information about the llvm-commits
mailing list