[llvm] [Offload] Implement `olShutDown` (PR #144055)

Ross Brunton via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 27 08:58:49 PDT 2025


https://github.com/RossBrunton updated https://github.com/llvm/llvm-project/pull/144055

>From 364ad9a4c8261e249b925a20c2d78b72fdcfb92c Mon Sep 17 00:00:00 2001
From: Ross Brunton <ross at codeplay.com>
Date: Thu, 12 Jun 2025 10:47:19 +0100
Subject: [PATCH 1/2] [Offload] Implement `olShutDown`

`olShutDown` was not properly calling deinit on the platforms, resulting
in random segfaults on AMD devices.
---
 offload/liboffload/API/Common.td             |  2 +-
 offload/liboffload/src/OffloadImpl.cpp       | 37 ++++++++++++++++----
 offload/unittests/OffloadAPI/init/olInit.cpp | 12 +++++++
 3 files changed, 44 insertions(+), 7 deletions(-)

diff --git a/offload/liboffload/API/Common.td b/offload/liboffload/API/Common.td
index 79c3bd46f1984..669dfd3cca7c6 100644
--- a/offload/liboffload/API/Common.td
+++ b/offload/liboffload/API/Common.td
@@ -176,7 +176,7 @@ def : Function {
   let desc = "Release the resources in use by Offload";
   let details = [
     "This decrements an internal reference count. When this reaches 0, all resources will be released",
-    "Subsequent API calls made after this are not valid"
+    "Subsequent API calls to methods other than `olInit` made after resources are released will return OL_ERRC_UNINITIALIZED"
   ];
   let params = [];
   let returns = [];
diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp
index 6adebb25a2db0..1719c896a90f3 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -97,6 +97,7 @@ struct AllocInfo {
 // Global shared state for liboffload
 struct OffloadContext;
 static OffloadContext *OffloadContextVal;
+std::mutex OffloadContextValMutex;
 struct OffloadContext {
   OffloadContext(OffloadContext &) = delete;
   OffloadContext(OffloadContext &&) = delete;
@@ -107,6 +108,7 @@ struct OffloadContext {
   bool ValidationEnabled = true;
   DenseMap<void *, AllocInfo> AllocInfoMap{};
   SmallVector<ol_platform_impl_t, 4> Platforms{};
+  size_t RefCount;
 
   ol_device_handle_t HostDevice() {
     // The host platform is always inserted last
@@ -191,18 +193,41 @@ Error initPlugins() {
   return Plugin::success();
 }
 
-// TODO: We can properly reference count here and manage the resources in a more
-// clever way
 Error olInit_impl() {
-  static std::once_flag InitFlag;
-  std::optional<Error> InitResult{};
-  std::call_once(InitFlag, [&] { InitResult = initPlugins(); });
+  std::lock_guard<std::mutex> Lock{OffloadContextValMutex};
+
+  std::optional<Error> InitResult;
+  if (!isOffloadInitialized())
+    InitResult = initPlugins();
+
+  OffloadContext::get().RefCount++;
 
   if (InitResult)
     return std::move(*InitResult);
   return Error::success();
 }
-Error olShutDown_impl() { return Error::success(); }
+
+Error olShutDown_impl() {
+  std::lock_guard<std::mutex> Lock{OffloadContextValMutex};
+
+  if (--OffloadContext::get().RefCount != 0)
+    return Error::success();
+
+  llvm::Error Result = Error::success();
+
+  for (auto &P : OffloadContext::get().Platforms) {
+    // Host plugin is nullptr and has no deinit
+    if (!P.Plugin)
+      continue;
+
+    if (auto Res = P.Plugin->deinit())
+      Result = llvm::joinErrors(std::move(Result), std::move(Res));
+  }
+  delete OffloadContextVal;
+  OffloadContextVal = nullptr;
+
+  return Result;
+}
 
 Error olGetPlatformInfoImplDetail(ol_platform_handle_t Platform,
                                   ol_platform_info_t PropName, size_t PropSize,
diff --git a/offload/unittests/OffloadAPI/init/olInit.cpp b/offload/unittests/OffloadAPI/init/olInit.cpp
index 8e27e77cd0fb5..508615152b4f1 100644
--- a/offload/unittests/OffloadAPI/init/olInit.cpp
+++ b/offload/unittests/OffloadAPI/init/olInit.cpp
@@ -15,8 +15,20 @@
 
 struct olInitTest : ::testing::Test {};
 
+TEST_F(olInitTest, Success) {
+  ASSERT_SUCCESS(olInit());
+  ASSERT_SUCCESS(olShutDown());
+}
+
 TEST_F(olInitTest, Uninitialized) {
   ASSERT_ERROR(OL_ERRC_UNINITIALIZED,
                olIterateDevices(
                    [](ol_device_handle_t, void *) { return false; }, nullptr));
 }
+
+TEST_F(olInitTest, RepeatedInit) {
+  for (size_t I = 0; I < 10; I++) {
+    ASSERT_SUCCESS(olInit());
+    ASSERT_SUCCESS(olShutDown());
+  }
+}

>From 42ee27834bc6ddd22e93d3a19fe3f75bb642f63c Mon Sep 17 00:00:00 2001
From: Ross Brunton <ross at codeplay.com>
Date: Fri, 27 Jun 2025 16:58:34 +0100
Subject: [PATCH 2/2] Move init to olInit

---
 offload/liboffload/src/OffloadImpl.cpp | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp
index 1719c896a90f3..9cf52171181f4 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -148,19 +148,19 @@ constexpr ol_platform_backend_t pluginNameToBackend(StringRef Name) {
 #include "Shared/Targets.def"
 
 Error initPlugins() {
-  auto *Context = new OffloadContext{};
+  auto &Context = OffloadContext::get();
 
   // Attempt to create an instance of each supported plugin.
 #define PLUGIN_TARGET(Name)                                                    \
   do {                                                                         \
-    Context->Platforms.emplace_back(ol_platform_impl_t{                        \
+    Context.Platforms.emplace_back(ol_platform_impl_t{                         \
         std::unique_ptr<GenericPluginTy>(createPlugin_##Name()),               \
         pluginNameToBackend(#Name)});                                          \
   } while (false);
 #include "Shared/Targets.def"
 
   // Preemptively initialize all devices in the plugin
-  for (auto &Platform : Context->Platforms) {
+  for (auto &Platform : Context.Platforms) {
     // Do not use the host plugin - it isn't supported.
     if (Platform.BackendType == OL_PLATFORM_BACKEND_UNKNOWN)
       continue;
@@ -180,15 +180,13 @@ Error initPlugins() {
   }
 
   // Add the special host device
-  auto &HostPlatform = Context->Platforms.emplace_back(
+  auto &HostPlatform = Context.Platforms.emplace_back(
       ol_platform_impl_t{nullptr, OL_PLATFORM_BACKEND_HOST});
   HostPlatform.Devices.emplace_back(-1, nullptr, nullptr, InfoTreeNode{});
-  Context->HostDevice()->Platform = &HostPlatform;
+  Context.HostDevice()->Platform = &HostPlatform;
 
-  Context->TracingEnabled = std::getenv("OFFLOAD_TRACE");
-  Context->ValidationEnabled = !std::getenv("OFFLOAD_DISABLE_VALIDATION");
-
-  OffloadContextVal = Context;
+  Context.TracingEnabled = std::getenv("OFFLOAD_TRACE");
+  Context.ValidationEnabled = !std::getenv("OFFLOAD_DISABLE_VALIDATION");
 
   return Plugin::success();
 }
@@ -197,8 +195,10 @@ Error olInit_impl() {
   std::lock_guard<std::mutex> Lock{OffloadContextValMutex};
 
   std::optional<Error> InitResult;
-  if (!isOffloadInitialized())
+  if (!isOffloadInitialized()) {
+    OffloadContextVal = new OffloadContext{};
     InitResult = initPlugins();
+  }
 
   OffloadContext::get().RefCount++;
 



More information about the llvm-commits mailing list