[llvm] [Offload] Remove non-blocking allocation type (PR #159851)
Joseph Huber via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 19 14:40:13 PDT 2025
https://github.com/jhuber6 updated https://github.com/llvm/llvm-project/pull/159851
>From 38446b01b0dc87a506205d752edea7b928082832 Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Fri, 19 Sep 2025 15:49:18 -0500
Subject: [PATCH] [Offload] Remove non-blocking allocation type
Summary:
This was originally added in as a hack to work around CUDA's limitation
on allocation. The `libc` implementation now isn't even used for CUDA so
this code is never hit. Even if this case, this code never truly worked.
A true solution would be to use CUDA's virtual memory API instead to
allocate 2MiB slabs independenctly from the normal memory management
done in the stream.
---
offload/include/omptarget.h | 2 --
offload/plugins-nextgen/amdgpu/src/rtl.cpp | 2 --
.../common/include/ErrorReporting.h | 1 -
.../common/src/PluginInterface.cpp | 2 --
offload/plugins-nextgen/common/src/RPC.cpp | 6 +++---
offload/plugins-nextgen/cuda/src/rtl.cpp | 19 -------------------
offload/plugins-nextgen/host/src/rtl.cpp | 1 -
7 files changed, 3 insertions(+), 30 deletions(-)
diff --git a/offload/include/omptarget.h b/offload/include/omptarget.h
index 8fd722bb15022..794b79e07674e 100644
--- a/offload/include/omptarget.h
+++ b/offload/include/omptarget.h
@@ -101,8 +101,6 @@ enum TargetAllocTy : int32_t {
TARGET_ALLOC_HOST,
TARGET_ALLOC_SHARED,
TARGET_ALLOC_DEFAULT,
- /// The allocation will not block on other streams.
- TARGET_ALLOC_DEVICE_NON_BLOCKING,
};
inline KernelArgsTy CTorDTorKernelArgs = {
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index 277fad29fd24f..64470e9fabf46 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -2350,7 +2350,6 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
switch (Kind) {
case TARGET_ALLOC_DEFAULT:
case TARGET_ALLOC_DEVICE:
- case TARGET_ALLOC_DEVICE_NON_BLOCKING:
MemoryPool = CoarseGrainedMemoryPools[0];
break;
case TARGET_ALLOC_HOST:
@@ -3847,7 +3846,6 @@ void *AMDGPUDeviceTy::allocate(size_t Size, void *, TargetAllocTy Kind) {
switch (Kind) {
case TARGET_ALLOC_DEFAULT:
case TARGET_ALLOC_DEVICE:
- case TARGET_ALLOC_DEVICE_NON_BLOCKING:
MemoryPool = CoarseGrainedMemoryPools[0];
break;
case TARGET_ALLOC_HOST:
diff --git a/offload/plugins-nextgen/common/include/ErrorReporting.h b/offload/plugins-nextgen/common/include/ErrorReporting.h
index 2ad0f2b7dd6c6..68d82cbea0f3c 100644
--- a/offload/plugins-nextgen/common/include/ErrorReporting.h
+++ b/offload/plugins-nextgen/common/include/ErrorReporting.h
@@ -61,7 +61,6 @@ class ErrorReporter {
/// Return a nice name for an TargetAllocTy.
static StringRef getAllocTyName(TargetAllocTy Kind) {
switch (Kind) {
- case TARGET_ALLOC_DEVICE_NON_BLOCKING:
case TARGET_ALLOC_DEFAULT:
case TARGET_ALLOC_DEVICE:
return "device memory";
diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp
index b1955b53b80e5..30b5db782370d 100644
--- a/offload/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp
@@ -1295,7 +1295,6 @@ Expected<void *> GenericDeviceTy::dataAlloc(int64_t Size, void *HostPtr,
switch (Kind) {
case TARGET_ALLOC_DEFAULT:
- case TARGET_ALLOC_DEVICE_NON_BLOCKING:
case TARGET_ALLOC_DEVICE:
if (MemoryManager) {
Alloc = MemoryManager->allocate(Size, HostPtr);
@@ -1386,7 +1385,6 @@ Error GenericDeviceTy::dataDelete(void *TgtPtr, TargetAllocTy Kind) {
int Res;
switch (Kind) {
case TARGET_ALLOC_DEFAULT:
- case TARGET_ALLOC_DEVICE_NON_BLOCKING:
case TARGET_ALLOC_DEVICE:
if (MemoryManager) {
Res = MemoryManager->free(TgtPtr);
diff --git a/offload/plugins-nextgen/common/src/RPC.cpp b/offload/plugins-nextgen/common/src/RPC.cpp
index 678be78b56af6..17d69b49b3b7e 100644
--- a/offload/plugins-nextgen/common/src/RPC.cpp
+++ b/offload/plugins-nextgen/common/src/RPC.cpp
@@ -28,15 +28,15 @@ rpc::Status handleOffloadOpcodes(plugin::GenericDeviceTy &Device,
switch (Port.get_opcode()) {
case LIBC_MALLOC: {
Port.recv_and_send([&](rpc::Buffer *Buffer, uint32_t) {
- Buffer->data[0] = reinterpret_cast<uintptr_t>(Device.allocate(
- Buffer->data[0], nullptr, TARGET_ALLOC_DEVICE_NON_BLOCKING));
+ Buffer->data[0] = reinterpret_cast<uintptr_t>(
+ Device.allocate(Buffer->data[0], nullptr, TARGET_ALLOC_DEVICE));
});
break;
}
case LIBC_FREE: {
Port.recv([&](rpc::Buffer *Buffer, uint32_t) {
Device.free(reinterpret_cast<void *>(Buffer->data[0]),
- TARGET_ALLOC_DEVICE_NON_BLOCKING);
+ TARGET_ALLOC_DEVICE);
});
break;
}
diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp
index fdd470fcd8113..b2f840113cff3 100644
--- a/offload/plugins-nextgen/cuda/src/rtl.cpp
+++ b/offload/plugins-nextgen/cuda/src/rtl.cpp
@@ -587,16 +587,6 @@ struct CUDADeviceTy : public GenericDeviceTy {
Res = cuMemAllocManaged(&DevicePtr, Size, CU_MEM_ATTACH_GLOBAL);
MemAlloc = (void *)DevicePtr;
break;
- case TARGET_ALLOC_DEVICE_NON_BLOCKING: {
- CUstream Stream;
- if ((Res = cuStreamCreate(&Stream, CU_STREAM_NON_BLOCKING)))
- break;
- if ((Res = cuMemAllocAsync(&DevicePtr, Size, Stream)))
- break;
- cuStreamSynchronize(Stream);
- Res = cuStreamDestroy(Stream);
- MemAlloc = (void *)DevicePtr;
- }
}
if (auto Err =
@@ -627,15 +617,6 @@ struct CUDADeviceTy : public GenericDeviceTy {
case TARGET_ALLOC_HOST:
Res = cuMemFreeHost(TgtPtr);
break;
- case TARGET_ALLOC_DEVICE_NON_BLOCKING: {
- CUstream Stream;
- if ((Res = cuStreamCreate(&Stream, CU_STREAM_NON_BLOCKING)))
- break;
- cuMemFreeAsync(reinterpret_cast<CUdeviceptr>(TgtPtr), Stream);
- cuStreamSynchronize(Stream);
- if ((Res = cuStreamDestroy(Stream)))
- break;
- }
}
if (auto Err = Plugin::check(Res, "error in cuMemFree[Host]: %s")) {
diff --git a/offload/plugins-nextgen/host/src/rtl.cpp b/offload/plugins-nextgen/host/src/rtl.cpp
index 0db01ca09ab02..44e2584fe53cc 100644
--- a/offload/plugins-nextgen/host/src/rtl.cpp
+++ b/offload/plugins-nextgen/host/src/rtl.cpp
@@ -250,7 +250,6 @@ struct GenELF64DeviceTy : public GenericDeviceTy {
case TARGET_ALLOC_DEVICE:
case TARGET_ALLOC_HOST:
case TARGET_ALLOC_SHARED:
- case TARGET_ALLOC_DEVICE_NON_BLOCKING:
MemAlloc = std::malloc(Size);
break;
}
More information about the llvm-commits
mailing list