[llvm] [Offload] Add an `unloadBinary` interface to PluginInterface (PR #143873)
Ross Brunton via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 19 02:58:25 PDT 2025
https://github.com/RossBrunton updated https://github.com/llvm/llvm-project/pull/143873
>From d146a9a946c66d4d717bd25ee2705db374a4217e Mon Sep 17 00:00:00 2001
From: Ross Brunton <ross at codeplay.com>
Date: Thu, 12 Jun 2025 12:11:09 +0100
Subject: [PATCH 1/3] [Offload] Add an `unloadBinary` interface to
PluginInterface
This allows removal of a specific Image from a Device, rather than
requiring all image data to outlive the device they were created for.
This is required for `ol_program_handle_t`s, which now specify the
lifetime of the buffer used to create the program.
---
offload/liboffload/API/Program.td | 4 +-
offload/liboffload/src/OffloadImpl.cpp | 3 +
offload/plugins-nextgen/amdgpu/src/rtl.cpp | 23 +++---
.../common/include/PluginInterface.h | 4 +
.../common/src/PluginInterface.cpp | 75 ++++++++++---------
offload/plugins-nextgen/cuda/src/rtl.cpp | 29 +++----
6 files changed, 76 insertions(+), 62 deletions(-)
diff --git a/offload/liboffload/API/Program.td b/offload/liboffload/API/Program.td
index 8c88fe6e21e6a..8ad85bec17b03 100644
--- a/offload/liboffload/API/Program.td
+++ b/offload/liboffload/API/Program.td
@@ -13,7 +13,9 @@
def : Function {
let name = "olCreateProgram";
let desc = "Create a program for the device from the binary image pointed to by `ProgData`.";
- let details = [];
+ let details = [
+ "`ProgData` must remain valid for the entire lifetime of the output `Program` handle",
+ ];
let params = [
Param<"ol_device_handle_t", "Device", "handle of the device", PARAM_IN>,
Param<"const void*", "ProgData", "pointer to the program binary data", PARAM_IN>,
diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp
index 770c212d804d2..bbc9f927ae27c 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -458,6 +458,9 @@ Error olCreateProgram_impl(ol_device_handle_t Device, const void *ProgData,
}
Error olDestroyProgram_impl(ol_program_handle_t Program) {
+ if (auto Err = Program->Image->getDevice().unloadBinary(Program->Image))
+ return Err;
+
return olDestroy(Program);
}
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index 73e1e66928fac..c851165acc3bc 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -2023,6 +2023,16 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
return Plugin::success();
}
+ Error unloadBinaryImpl(DeviceImageTy *Image) override {
+ AMDGPUDeviceImageTy &AMDImage = static_cast<AMDGPUDeviceImageTy &>(*Image);
+
+ // Unload the executable of the image.
+ if (auto Err = AMDImage.unloadExecutable())
+ return Err;
+
+ return Plugin::success();
+ }
+
/// Deinitialize the device and release its resources.
Error deinitImpl() override {
// Deinitialize the stream and event pools.
@@ -2035,19 +2045,6 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
if (auto Err = AMDGPUSignalManager.deinit())
return Err;
- // Close modules if necessary.
- if (!LoadedImages.empty()) {
- // Each image has its own module.
- for (DeviceImageTy *Image : LoadedImages) {
- AMDGPUDeviceImageTy &AMDImage =
- static_cast<AMDGPUDeviceImageTy &>(*Image);
-
- // Unload the executable of the image.
- if (auto Err = AMDImage.unloadExecutable())
- return Err;
- }
- }
-
// Invalidate agent reference.
Agent = {0};
diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h
index f5d995532b7a5..67e66ef095e56 100644
--- a/offload/plugins-nextgen/common/include/PluginInterface.h
+++ b/offload/plugins-nextgen/common/include/PluginInterface.h
@@ -735,6 +735,10 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
virtual Expected<DeviceImageTy *>
loadBinaryImpl(const __tgt_device_image *TgtImage, int32_t ImageId) = 0;
+ /// Unload a previously loaded Image from the device
+ Error unloadBinary(DeviceImageTy *Image);
+ virtual Error unloadBinaryImpl(DeviceImageTy *Image) = 0;
+
/// Setup the device environment if needed. Notice this setup may not be run
/// on some plugins. By default, it will be executed, but plugins can change
/// this behavior by overriding the shouldSetupDeviceEnvironment function.
diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp
index 6fd3405d03afa..038fd7aeb75ee 100644
--- a/offload/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp
@@ -821,26 +821,52 @@ Error GenericDeviceTy::init(GenericPluginTy &Plugin) {
return Plugin::success();
}
-Error GenericDeviceTy::deinit(GenericPluginTy &Plugin) {
- for (DeviceImageTy *Image : LoadedImages)
- if (auto Err = callGlobalDestructors(Plugin, *Image))
- return Err;
+Error GenericDeviceTy::unloadBinary(DeviceImageTy *Image) {
+ if (auto Err = callGlobalDestructors(Plugin, *Image))
+ return Err;
if (OMPX_DebugKind.get() & uint32_t(DeviceDebugKind::AllocationTracker)) {
GenericGlobalHandlerTy &GHandler = Plugin.getGlobalHandler();
- for (auto *Image : LoadedImages) {
- DeviceMemoryPoolTrackingTy ImageDeviceMemoryPoolTracking = {0, 0, ~0U, 0};
- GlobalTy TrackerGlobal("__omp_rtl_device_memory_pool_tracker",
- sizeof(DeviceMemoryPoolTrackingTy),
- &ImageDeviceMemoryPoolTracking);
- if (auto Err =
- GHandler.readGlobalFromDevice(*this, *Image, TrackerGlobal)) {
- consumeError(std::move(Err));
- continue;
- }
- DeviceMemoryPoolTracking.combine(ImageDeviceMemoryPoolTracking);
+ DeviceMemoryPoolTrackingTy ImageDeviceMemoryPoolTracking = {0, 0, ~0U, 0};
+ GlobalTy TrackerGlobal("__omp_rtl_device_memory_pool_tracker",
+ sizeof(DeviceMemoryPoolTrackingTy),
+ &ImageDeviceMemoryPoolTracking);
+ if (auto Err =
+ GHandler.readGlobalFromDevice(*this, *Image, TrackerGlobal)) {
+ consumeError(std::move(Err));
}
+ DeviceMemoryPoolTracking.combine(ImageDeviceMemoryPoolTracking);
+ }
+
+ GenericGlobalHandlerTy &Handler = Plugin.getGlobalHandler();
+ auto ProfOrErr = Handler.readProfilingGlobals(*this, *Image);
+ if (!ProfOrErr)
+ return ProfOrErr.takeError();
+
+ if (!ProfOrErr->empty()) {
+ // Dump out profdata
+ if ((OMPX_DebugKind.get() & uint32_t(DeviceDebugKind::PGODump)) ==
+ uint32_t(DeviceDebugKind::PGODump))
+ ProfOrErr->dump();
+
+ // Write data to profiling file
+ if (auto Err = ProfOrErr->write())
+ return Err;
+ }
+
+ LoadedImages.erase(
+ std::find(LoadedImages.begin(), LoadedImages.end(), Image));
+ return unloadBinaryImpl(Image);
+}
+
+Error GenericDeviceTy::deinit(GenericPluginTy &Plugin) {
+ while (!LoadedImages.empty()) {
+ if (auto Err = unloadBinary(LoadedImages.back()))
+ return Err;
+ }
+
+ if (OMPX_DebugKind.get() & uint32_t(DeviceDebugKind::AllocationTracker)) {
// TODO: Write this by default into a file.
printf("\n\n|-----------------------\n"
"| Device memory tracker:\n"
@@ -856,25 +882,6 @@ Error GenericDeviceTy::deinit(GenericPluginTy &Plugin) {
DeviceMemoryPoolTracking.AllocationMax);
}
- for (auto *Image : LoadedImages) {
- GenericGlobalHandlerTy &Handler = Plugin.getGlobalHandler();
- auto ProfOrErr = Handler.readProfilingGlobals(*this, *Image);
- if (!ProfOrErr)
- return ProfOrErr.takeError();
-
- if (ProfOrErr->empty())
- continue;
-
- // Dump out profdata
- if ((OMPX_DebugKind.get() & uint32_t(DeviceDebugKind::PGODump)) ==
- uint32_t(DeviceDebugKind::PGODump))
- ProfOrErr->dump();
-
- // Write data to profiling file
- if (auto Err = ProfOrErr->write())
- return Err;
- }
-
// Delete the memory manager before deinitializing the device. Otherwise,
// we may delete device allocations after the device is deinitialized.
if (MemoryManager)
diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp
index 9943f533ef5a8..2b791111cc1ce 100644
--- a/offload/plugins-nextgen/cuda/src/rtl.cpp
+++ b/offload/plugins-nextgen/cuda/src/rtl.cpp
@@ -358,6 +358,21 @@ struct CUDADeviceTy : public GenericDeviceTy {
return Plugin::success();
}
+ Error unloadBinaryImpl(DeviceImageTy *Image) override {
+ assert(Context && "Invalid CUDA context");
+
+ // Each image has its own module.
+ for (DeviceImageTy *Image : LoadedImages) {
+ CUDADeviceImageTy &CUDAImage = static_cast<CUDADeviceImageTy &>(*Image);
+
+ // Unload the module of the image.
+ if (auto Err = CUDAImage.unloadModule())
+ return Err;
+ }
+
+ return Plugin::success();
+ }
+
/// Deinitialize the device and release its resources.
Error deinitImpl() override {
if (Context) {
@@ -372,20 +387,6 @@ struct CUDADeviceTy : public GenericDeviceTy {
if (auto Err = CUDAEventManager.deinit())
return Err;
- // Close modules if necessary.
- if (!LoadedImages.empty()) {
- assert(Context && "Invalid CUDA context");
-
- // Each image has its own module.
- for (DeviceImageTy *Image : LoadedImages) {
- CUDADeviceImageTy &CUDAImage = static_cast<CUDADeviceImageTy &>(*Image);
-
- // Unload the module of the image.
- if (auto Err = CUDAImage.unloadModule())
- return Err;
- }
- }
-
if (Context) {
CUresult Res = cuDevicePrimaryCtxRelease(Device);
if (auto Err =
>From 0c30b9e2961703c1144083a3b8b6a185e05d12e5 Mon Sep 17 00:00:00 2001
From: Ross Brunton <ross at codeplay.com>
Date: Thu, 12 Jun 2025 12:25:23 +0100
Subject: [PATCH 2/3] Fix cuda
---
offload/plugins-nextgen/cuda/src/rtl.cpp | 10 ++++------
1 file changed, 4 insertions(+), 6 deletions(-)
diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp
index 2b791111cc1ce..0e662b038c363 100644
--- a/offload/plugins-nextgen/cuda/src/rtl.cpp
+++ b/offload/plugins-nextgen/cuda/src/rtl.cpp
@@ -362,13 +362,11 @@ struct CUDADeviceTy : public GenericDeviceTy {
assert(Context && "Invalid CUDA context");
// Each image has its own module.
- for (DeviceImageTy *Image : LoadedImages) {
- CUDADeviceImageTy &CUDAImage = static_cast<CUDADeviceImageTy &>(*Image);
+ CUDADeviceImageTy &CUDAImage = static_cast<CUDADeviceImageTy &>(*Image);
- // Unload the module of the image.
- if (auto Err = CUDAImage.unloadModule())
- return Err;
- }
+ // Unload the module of the image.
+ if (auto Err = CUDAImage.unloadModule())
+ return Err;
return Plugin::success();
}
>From 9afacf3d30260e8eaf793d93922f81431cf6eff9 Mon Sep 17 00:00:00 2001
From: Ross Brunton <ross at codeplay.com>
Date: Thu, 12 Jun 2025 15:51:47 +0100
Subject: [PATCH 3/3] Review feedback
---
offload/liboffload/API/Program.td | 2 +-
offload/plugins-nextgen/amdgpu/src/rtl.cpp | 5 +----
2 files changed, 2 insertions(+), 5 deletions(-)
diff --git a/offload/liboffload/API/Program.td b/offload/liboffload/API/Program.td
index 8ad85bec17b03..0476fa1f7c27a 100644
--- a/offload/liboffload/API/Program.td
+++ b/offload/liboffload/API/Program.td
@@ -14,7 +14,7 @@ def : Function {
let name = "olCreateProgram";
let desc = "Create a program for the device from the binary image pointed to by `ProgData`.";
let details = [
- "`ProgData` must remain valid for the entire lifetime of the output `Program` handle",
+ "The provided `ProgData` will be copied and need not outlive the returned handle",
];
let params = [
Param<"ol_device_handle_t", "Device", "handle of the device", PARAM_IN>,
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index c851165acc3bc..bc1a768feafdd 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -2027,10 +2027,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
AMDGPUDeviceImageTy &AMDImage = static_cast<AMDGPUDeviceImageTy &>(*Image);
// Unload the executable of the image.
- if (auto Err = AMDImage.unloadExecutable())
- return Err;
-
- return Plugin::success();
+ return AMDImage.unloadExecutable();
}
/// Deinitialize the device and release its resources.
More information about the llvm-commits
mailing list