[llvm] [Offload] Implement `olShutDown` (PR #144055)
Ross Brunton via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 27 08:58:49 PDT 2025
https://github.com/RossBrunton updated https://github.com/llvm/llvm-project/pull/144055
>From 364ad9a4c8261e249b925a20c2d78b72fdcfb92c Mon Sep 17 00:00:00 2001
From: Ross Brunton <ross at codeplay.com>
Date: Thu, 12 Jun 2025 10:47:19 +0100
Subject: [PATCH 1/2] [Offload] Implement `olShutDown`
`olShutDown` was not properly calling deinit on the platforms, resulting
in random segfaults on AMD devices.
---
offload/liboffload/API/Common.td | 2 +-
offload/liboffload/src/OffloadImpl.cpp | 37 ++++++++++++++++----
offload/unittests/OffloadAPI/init/olInit.cpp | 12 +++++++
3 files changed, 44 insertions(+), 7 deletions(-)
diff --git a/offload/liboffload/API/Common.td b/offload/liboffload/API/Common.td
index 79c3bd46f1984..669dfd3cca7c6 100644
--- a/offload/liboffload/API/Common.td
+++ b/offload/liboffload/API/Common.td
@@ -176,7 +176,7 @@ def : Function {
let desc = "Release the resources in use by Offload";
let details = [
"This decrements an internal reference count. When this reaches 0, all resources will be released",
- "Subsequent API calls made after this are not valid"
+ "Subsequent API calls to methods other than `olInit` made after resources are released will return OL_ERRC_UNINITIALIZED"
];
let params = [];
let returns = [];
diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp
index 6adebb25a2db0..1719c896a90f3 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -97,6 +97,7 @@ struct AllocInfo {
// Global shared state for liboffload
struct OffloadContext;
static OffloadContext *OffloadContextVal;
+std::mutex OffloadContextValMutex;
struct OffloadContext {
OffloadContext(OffloadContext &) = delete;
OffloadContext(OffloadContext &&) = delete;
@@ -107,6 +108,7 @@ struct OffloadContext {
bool ValidationEnabled = true;
DenseMap<void *, AllocInfo> AllocInfoMap{};
SmallVector<ol_platform_impl_t, 4> Platforms{};
+ size_t RefCount;
ol_device_handle_t HostDevice() {
// The host platform is always inserted last
@@ -191,18 +193,41 @@ Error initPlugins() {
return Plugin::success();
}
-// TODO: We can properly reference count here and manage the resources in a more
-// clever way
Error olInit_impl() {
- static std::once_flag InitFlag;
- std::optional<Error> InitResult{};
- std::call_once(InitFlag, [&] { InitResult = initPlugins(); });
+ std::lock_guard<std::mutex> Lock{OffloadContextValMutex};
+
+ std::optional<Error> InitResult;
+ if (!isOffloadInitialized())
+ InitResult = initPlugins();
+
+ OffloadContext::get().RefCount++;
if (InitResult)
return std::move(*InitResult);
return Error::success();
}
-Error olShutDown_impl() { return Error::success(); }
+
+Error olShutDown_impl() {
+ std::lock_guard<std::mutex> Lock{OffloadContextValMutex};
+
+ if (--OffloadContext::get().RefCount != 0)
+ return Error::success();
+
+ llvm::Error Result = Error::success();
+
+ for (auto &P : OffloadContext::get().Platforms) {
+ // Host plugin is nullptr and has no deinit
+ if (!P.Plugin)
+ continue;
+
+ if (auto Res = P.Plugin->deinit())
+ Result = llvm::joinErrors(std::move(Result), std::move(Res));
+ }
+ delete OffloadContextVal;
+ OffloadContextVal = nullptr;
+
+ return Result;
+}
Error olGetPlatformInfoImplDetail(ol_platform_handle_t Platform,
ol_platform_info_t PropName, size_t PropSize,
diff --git a/offload/unittests/OffloadAPI/init/olInit.cpp b/offload/unittests/OffloadAPI/init/olInit.cpp
index 8e27e77cd0fb5..508615152b4f1 100644
--- a/offload/unittests/OffloadAPI/init/olInit.cpp
+++ b/offload/unittests/OffloadAPI/init/olInit.cpp
@@ -15,8 +15,20 @@
struct olInitTest : ::testing::Test {};
+TEST_F(olInitTest, Success) {
+ ASSERT_SUCCESS(olInit());
+ ASSERT_SUCCESS(olShutDown());
+}
+
TEST_F(olInitTest, Uninitialized) {
ASSERT_ERROR(OL_ERRC_UNINITIALIZED,
olIterateDevices(
[](ol_device_handle_t, void *) { return false; }, nullptr));
}
+
+TEST_F(olInitTest, RepeatedInit) {
+ for (size_t I = 0; I < 10; I++) {
+ ASSERT_SUCCESS(olInit());
+ ASSERT_SUCCESS(olShutDown());
+ }
+}
>From 42ee27834bc6ddd22e93d3a19fe3f75bb642f63c Mon Sep 17 00:00:00 2001
From: Ross Brunton <ross at codeplay.com>
Date: Fri, 27 Jun 2025 16:58:34 +0100
Subject: [PATCH 2/2] Move init to olInit
---
offload/liboffload/src/OffloadImpl.cpp | 20 ++++++++++----------
1 file changed, 10 insertions(+), 10 deletions(-)
diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp
index 1719c896a90f3..9cf52171181f4 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -148,19 +148,19 @@ constexpr ol_platform_backend_t pluginNameToBackend(StringRef Name) {
#include "Shared/Targets.def"
Error initPlugins() {
- auto *Context = new OffloadContext{};
+ auto &Context = OffloadContext::get();
// Attempt to create an instance of each supported plugin.
#define PLUGIN_TARGET(Name) \
do { \
- Context->Platforms.emplace_back(ol_platform_impl_t{ \
+ Context.Platforms.emplace_back(ol_platform_impl_t{ \
std::unique_ptr<GenericPluginTy>(createPlugin_##Name()), \
pluginNameToBackend(#Name)}); \
} while (false);
#include "Shared/Targets.def"
// Preemptively initialize all devices in the plugin
- for (auto &Platform : Context->Platforms) {
+ for (auto &Platform : Context.Platforms) {
// Do not use the host plugin - it isn't supported.
if (Platform.BackendType == OL_PLATFORM_BACKEND_UNKNOWN)
continue;
@@ -180,15 +180,13 @@ Error initPlugins() {
}
// Add the special host device
- auto &HostPlatform = Context->Platforms.emplace_back(
+ auto &HostPlatform = Context.Platforms.emplace_back(
ol_platform_impl_t{nullptr, OL_PLATFORM_BACKEND_HOST});
HostPlatform.Devices.emplace_back(-1, nullptr, nullptr, InfoTreeNode{});
- Context->HostDevice()->Platform = &HostPlatform;
+ Context.HostDevice()->Platform = &HostPlatform;
- Context->TracingEnabled = std::getenv("OFFLOAD_TRACE");
- Context->ValidationEnabled = !std::getenv("OFFLOAD_DISABLE_VALIDATION");
-
- OffloadContextVal = Context;
+ Context.TracingEnabled = std::getenv("OFFLOAD_TRACE");
+ Context.ValidationEnabled = !std::getenv("OFFLOAD_DISABLE_VALIDATION");
return Plugin::success();
}
@@ -197,8 +195,10 @@ Error olInit_impl() {
std::lock_guard<std::mutex> Lock{OffloadContextValMutex};
std::optional<Error> InitResult;
- if (!isOffloadInitialized())
+ if (!isOffloadInitialized()) {
+ OffloadContextVal = new OffloadContext{};
InitResult = initPlugins();
+ }
OffloadContext::get().RefCount++;
More information about the llvm-commits
mailing list