[llvm] [Offload] Add an `unloadBinary` interface to PluginInterface (PR #143873)

Ross Brunton via llvm-commits llvm-commits at lists.llvm.org
Thu Jun 19 02:58:25 PDT 2025


https://github.com/RossBrunton updated https://github.com/llvm/llvm-project/pull/143873

>From d146a9a946c66d4d717bd25ee2705db374a4217e Mon Sep 17 00:00:00 2001
From: Ross Brunton <ross at codeplay.com>
Date: Thu, 12 Jun 2025 12:11:09 +0100
Subject: [PATCH 1/3] [Offload] Add an `unloadBinary` interface to
 PluginInterface

This allows removal of a specific Image from a Device, rather than
requiring all image data to outlive the device they were created for.

This is required for `ol_program_handle_t`s, which now specify the
lifetime of the buffer used to create the program.
---
 offload/liboffload/API/Program.td             |  4 +-
 offload/liboffload/src/OffloadImpl.cpp        |  3 +
 offload/plugins-nextgen/amdgpu/src/rtl.cpp    | 23 +++---
 .../common/include/PluginInterface.h          |  4 +
 .../common/src/PluginInterface.cpp            | 75 ++++++++++---------
 offload/plugins-nextgen/cuda/src/rtl.cpp      | 29 +++----
 6 files changed, 76 insertions(+), 62 deletions(-)

diff --git a/offload/liboffload/API/Program.td b/offload/liboffload/API/Program.td
index 8c88fe6e21e6a..8ad85bec17b03 100644
--- a/offload/liboffload/API/Program.td
+++ b/offload/liboffload/API/Program.td
@@ -13,7 +13,9 @@
 def : Function {
     let name = "olCreateProgram";
     let desc = "Create a program for the device from the binary image pointed to by `ProgData`.";
-    let details = [];
+    let details = [
+        "`ProgData` must remain valid for the entire lifetime of the output `Program` handle",
+    ];
     let params = [
         Param<"ol_device_handle_t", "Device", "handle of the device", PARAM_IN>,
         Param<"const void*", "ProgData", "pointer to the program binary data", PARAM_IN>,
diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp
index 770c212d804d2..bbc9f927ae27c 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -458,6 +458,9 @@ Error olCreateProgram_impl(ol_device_handle_t Device, const void *ProgData,
 }
 
 Error olDestroyProgram_impl(ol_program_handle_t Program) {
+  if (auto Err = Program->Image->getDevice().unloadBinary(Program->Image))
+    return Err;
+
   return olDestroy(Program);
 }
 
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index 73e1e66928fac..c851165acc3bc 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -2023,6 +2023,16 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
     return Plugin::success();
   }
 
+  Error unloadBinaryImpl(DeviceImageTy *Image) override {
+    AMDGPUDeviceImageTy &AMDImage = static_cast<AMDGPUDeviceImageTy &>(*Image);
+
+    // Unload the executable of the image.
+    if (auto Err = AMDImage.unloadExecutable())
+      return Err;
+
+    return Plugin::success();
+  }
+
   /// Deinitialize the device and release its resources.
   Error deinitImpl() override {
     // Deinitialize the stream and event pools.
@@ -2035,19 +2045,6 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
     if (auto Err = AMDGPUSignalManager.deinit())
       return Err;
 
-    // Close modules if necessary.
-    if (!LoadedImages.empty()) {
-      // Each image has its own module.
-      for (DeviceImageTy *Image : LoadedImages) {
-        AMDGPUDeviceImageTy &AMDImage =
-            static_cast<AMDGPUDeviceImageTy &>(*Image);
-
-        // Unload the executable of the image.
-        if (auto Err = AMDImage.unloadExecutable())
-          return Err;
-      }
-    }
-
     // Invalidate agent reference.
     Agent = {0};
 
diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h
index f5d995532b7a5..67e66ef095e56 100644
--- a/offload/plugins-nextgen/common/include/PluginInterface.h
+++ b/offload/plugins-nextgen/common/include/PluginInterface.h
@@ -735,6 +735,10 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
   virtual Expected<DeviceImageTy *>
   loadBinaryImpl(const __tgt_device_image *TgtImage, int32_t ImageId) = 0;
 
+  /// Unload a previously loaded Image from the device
+  Error unloadBinary(DeviceImageTy *Image);
+  virtual Error unloadBinaryImpl(DeviceImageTy *Image) = 0;
+
   /// Setup the device environment if needed. Notice this setup may not be run
   /// on some plugins. By default, it will be executed, but plugins can change
   /// this behavior by overriding the shouldSetupDeviceEnvironment function.
diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp
index 6fd3405d03afa..038fd7aeb75ee 100644
--- a/offload/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp
@@ -821,26 +821,52 @@ Error GenericDeviceTy::init(GenericPluginTy &Plugin) {
   return Plugin::success();
 }
 
-Error GenericDeviceTy::deinit(GenericPluginTy &Plugin) {
-  for (DeviceImageTy *Image : LoadedImages)
-    if (auto Err = callGlobalDestructors(Plugin, *Image))
-      return Err;
+Error GenericDeviceTy::unloadBinary(DeviceImageTy *Image) {
+  if (auto Err = callGlobalDestructors(Plugin, *Image))
+    return Err;
 
   if (OMPX_DebugKind.get() & uint32_t(DeviceDebugKind::AllocationTracker)) {
     GenericGlobalHandlerTy &GHandler = Plugin.getGlobalHandler();
-    for (auto *Image : LoadedImages) {
-      DeviceMemoryPoolTrackingTy ImageDeviceMemoryPoolTracking = {0, 0, ~0U, 0};
-      GlobalTy TrackerGlobal("__omp_rtl_device_memory_pool_tracker",
-                             sizeof(DeviceMemoryPoolTrackingTy),
-                             &ImageDeviceMemoryPoolTracking);
-      if (auto Err =
-              GHandler.readGlobalFromDevice(*this, *Image, TrackerGlobal)) {
-        consumeError(std::move(Err));
-        continue;
-      }
-      DeviceMemoryPoolTracking.combine(ImageDeviceMemoryPoolTracking);
+    DeviceMemoryPoolTrackingTy ImageDeviceMemoryPoolTracking = {0, 0, ~0U, 0};
+    GlobalTy TrackerGlobal("__omp_rtl_device_memory_pool_tracker",
+                           sizeof(DeviceMemoryPoolTrackingTy),
+                           &ImageDeviceMemoryPoolTracking);
+    if (auto Err =
+            GHandler.readGlobalFromDevice(*this, *Image, TrackerGlobal)) {
+      consumeError(std::move(Err));
     }
+    DeviceMemoryPoolTracking.combine(ImageDeviceMemoryPoolTracking);
+  }
+
+  GenericGlobalHandlerTy &Handler = Plugin.getGlobalHandler();
+  auto ProfOrErr = Handler.readProfilingGlobals(*this, *Image);
+  if (!ProfOrErr)
+    return ProfOrErr.takeError();
+
+  if (!ProfOrErr->empty()) {
+    // Dump out profdata
+    if ((OMPX_DebugKind.get() & uint32_t(DeviceDebugKind::PGODump)) ==
+        uint32_t(DeviceDebugKind::PGODump))
+      ProfOrErr->dump();
+
+    // Write data to profiling file
+    if (auto Err = ProfOrErr->write())
+      return Err;
+  }
+
+  LoadedImages.erase(
+      std::find(LoadedImages.begin(), LoadedImages.end(), Image));
 
+  return unloadBinaryImpl(Image);
+}
+
+Error GenericDeviceTy::deinit(GenericPluginTy &Plugin) {
+  while (!LoadedImages.empty()) {
+    if (auto Err = unloadBinary(LoadedImages.back()))
+      return Err;
+  }
+
+  if (OMPX_DebugKind.get() & uint32_t(DeviceDebugKind::AllocationTracker)) {
     // TODO: Write this by default into a file.
     printf("\n\n|-----------------------\n"
            "| Device memory tracker:\n"
@@ -856,25 +882,6 @@ Error GenericDeviceTy::deinit(GenericPluginTy &Plugin) {
            DeviceMemoryPoolTracking.AllocationMax);
   }
 
-  for (auto *Image : LoadedImages) {
-    GenericGlobalHandlerTy &Handler = Plugin.getGlobalHandler();
-    auto ProfOrErr = Handler.readProfilingGlobals(*this, *Image);
-    if (!ProfOrErr)
-      return ProfOrErr.takeError();
-
-    if (ProfOrErr->empty())
-      continue;
-
-    // Dump out profdata
-    if ((OMPX_DebugKind.get() & uint32_t(DeviceDebugKind::PGODump)) ==
-        uint32_t(DeviceDebugKind::PGODump))
-      ProfOrErr->dump();
-
-    // Write data to profiling file
-    if (auto Err = ProfOrErr->write())
-      return Err;
-  }
-
   // Delete the memory manager before deinitializing the device. Otherwise,
   // we may delete device allocations after the device is deinitialized.
   if (MemoryManager)
diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp
index 9943f533ef5a8..2b791111cc1ce 100644
--- a/offload/plugins-nextgen/cuda/src/rtl.cpp
+++ b/offload/plugins-nextgen/cuda/src/rtl.cpp
@@ -358,6 +358,21 @@ struct CUDADeviceTy : public GenericDeviceTy {
     return Plugin::success();
   }
 
+  Error unloadBinaryImpl(DeviceImageTy *Image) override {
+    assert(Context && "Invalid CUDA context");
+
+    // Each image has its own module.
+    for (DeviceImageTy *Image : LoadedImages) {
+      CUDADeviceImageTy &CUDAImage = static_cast<CUDADeviceImageTy &>(*Image);
+
+      // Unload the module of the image.
+      if (auto Err = CUDAImage.unloadModule())
+        return Err;
+    }
+
+    return Plugin::success();
+  }
+
   /// Deinitialize the device and release its resources.
   Error deinitImpl() override {
     if (Context) {
@@ -372,20 +387,6 @@ struct CUDADeviceTy : public GenericDeviceTy {
     if (auto Err = CUDAEventManager.deinit())
       return Err;
 
-    // Close modules if necessary.
-    if (!LoadedImages.empty()) {
-      assert(Context && "Invalid CUDA context");
-
-      // Each image has its own module.
-      for (DeviceImageTy *Image : LoadedImages) {
-        CUDADeviceImageTy &CUDAImage = static_cast<CUDADeviceImageTy &>(*Image);
-
-        // Unload the module of the image.
-        if (auto Err = CUDAImage.unloadModule())
-          return Err;
-      }
-    }
-
     if (Context) {
       CUresult Res = cuDevicePrimaryCtxRelease(Device);
       if (auto Err =

>From 0c30b9e2961703c1144083a3b8b6a185e05d12e5 Mon Sep 17 00:00:00 2001
From: Ross Brunton <ross at codeplay.com>
Date: Thu, 12 Jun 2025 12:25:23 +0100
Subject: [PATCH 2/3] Fix cuda

---
 offload/plugins-nextgen/cuda/src/rtl.cpp | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp
index 2b791111cc1ce..0e662b038c363 100644
--- a/offload/plugins-nextgen/cuda/src/rtl.cpp
+++ b/offload/plugins-nextgen/cuda/src/rtl.cpp
@@ -362,13 +362,11 @@ struct CUDADeviceTy : public GenericDeviceTy {
     assert(Context && "Invalid CUDA context");
 
     // Each image has its own module.
-    for (DeviceImageTy *Image : LoadedImages) {
-      CUDADeviceImageTy &CUDAImage = static_cast<CUDADeviceImageTy &>(*Image);
+    CUDADeviceImageTy &CUDAImage = static_cast<CUDADeviceImageTy &>(*Image);
 
-      // Unload the module of the image.
-      if (auto Err = CUDAImage.unloadModule())
-        return Err;
-    }
+    // Unload the module of the image.
+    if (auto Err = CUDAImage.unloadModule())
+      return Err;
 
     return Plugin::success();
   }

>From 9afacf3d30260e8eaf793d93922f81431cf6eff9 Mon Sep 17 00:00:00 2001
From: Ross Brunton <ross at codeplay.com>
Date: Thu, 12 Jun 2025 15:51:47 +0100
Subject: [PATCH 3/3] Review feedback

---
 offload/liboffload/API/Program.td          | 2 +-
 offload/plugins-nextgen/amdgpu/src/rtl.cpp | 5 +----
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/offload/liboffload/API/Program.td b/offload/liboffload/API/Program.td
index 8ad85bec17b03..0476fa1f7c27a 100644
--- a/offload/liboffload/API/Program.td
+++ b/offload/liboffload/API/Program.td
@@ -14,7 +14,7 @@ def : Function {
     let name = "olCreateProgram";
     let desc = "Create a program for the device from the binary image pointed to by `ProgData`.";
     let details = [
-        "`ProgData` must remain valid for the entire lifetime of the output `Program` handle",
+        "The provided `ProgData` will be copied and need not outlive the returned handle",
     ];
     let params = [
         Param<"ol_device_handle_t", "Device", "handle of the device", PARAM_IN>,
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index c851165acc3bc..bc1a768feafdd 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -2027,10 +2027,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
     AMDGPUDeviceImageTy &AMDImage = static_cast<AMDGPUDeviceImageTy &>(*Image);
 
     // Unload the executable of the image.
-    if (auto Err = AMDImage.unloadExecutable())
-      return Err;
-
-    return Plugin::success();
+    return AMDImage.unloadExecutable();
   }
 
   /// Deinitialize the device and release its resources.



More information about the llvm-commits mailing list