[llvm] [Offload] `olLaunchHostFunction` (PR #152482)
Ross Brunton via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 14 05:32:04 PDT 2025
https://github.com/RossBrunton updated https://github.com/llvm/llvm-project/pull/152482
>From d1b750eb380bc6489e2d690e25742956e108e5e5 Mon Sep 17 00:00:00 2001
From: Ross Brunton <ross at codeplay.com>
Date: Thu, 7 Aug 2025 12:54:52 +0100
Subject: [PATCH 1/6] [Offload] `olEnqueueHostCallback`
Add an `olEnqueueHostCallback` method that allows enqueueing host work
to the stream.
---
offload/liboffload/API/APIDefs.td | 10 +++-
offload/liboffload/API/Queue.td | 26 ++++++++++
offload/liboffload/src/OffloadImpl.cpp | 7 +++
offload/plugins-nextgen/amdgpu/src/rtl.cpp | 48 +++++++++++++++++++
.../common/include/PluginInterface.h | 7 +++
.../common/src/PluginInterface.cpp | 10 ++++
offload/plugins-nextgen/cuda/src/rtl.cpp | 13 +++++
offload/plugins-nextgen/host/src/rtl.cpp | 6 +++
offload/unittests/OffloadAPI/CMakeLists.txt | 3 +-
.../queue/olEnqueueHostCallback.cpp | 48 +++++++++++++++++++
10 files changed, 176 insertions(+), 2 deletions(-)
create mode 100644 offload/unittests/OffloadAPI/queue/olEnqueueHostCallback.cpp
diff --git a/offload/liboffload/API/APIDefs.td b/offload/liboffload/API/APIDefs.td
index 640932dcf8464..bd4cbbaa546b2 100644
--- a/offload/liboffload/API/APIDefs.td
+++ b/offload/liboffload/API/APIDefs.td
@@ -31,6 +31,13 @@ class IsHandleType<string Type> {
!ne(!find(Type, "_handle_t", !sub(!size(Type), 9)), -1));
}
+// Does the type end with '_cb_t'?
+class IsCallbackType<string Type> {
+ // size("_cb_t") == 5
+ bit ret = !if(!lt(!size(Type), 5), 0,
+ !ne(!find(Type, "_cb_t", !sub(!size(Type), 5)), -1));
+}
+
// Does the type end with '*'?
class IsPointerType<string Type> {
bit ret = !ne(!find(Type, "*", !sub(!size(Type), 1)), -1);
@@ -58,6 +65,7 @@ class Param<string Type, string Name, string Desc, bits<3> Flags = 0> {
TypeInfo type_info = TypeInfo<"", "">;
bit IsHandle = IsHandleType<type>.ret;
bit IsPointer = IsPointerType<type>.ret;
+ bit IsCallback = IsCallbackType<type>.ret;
}
// A parameter whose range is described by other parameters in the function.
@@ -81,7 +89,7 @@ class ShouldCheckHandle<Param P> {
}
class ShouldCheckPointer<Param P> {
- bit ret = !and(P.IsPointer, !eq(!and(PARAM_OPTIONAL, P.flags), 0));
+ bit ret = !and(!or(P.IsPointer, P.IsCallback), !eq(!and(PARAM_OPTIONAL, P.flags), 0));
}
// For a list of returns that contains a specific return code, find and append
diff --git a/offload/liboffload/API/Queue.td b/offload/liboffload/API/Queue.td
index 1d9f6f2d11c9b..5fd202bba8496 100644
--- a/offload/liboffload/API/Queue.td
+++ b/offload/liboffload/API/Queue.td
@@ -108,3 +108,29 @@ def : Function {
Return<"OL_ERRC_INVALID_QUEUE">
];
}
+
+def : FptrTypedef {
+ let name = "ol_queue_callback_cb_t";
+ let desc = "Callback function for use by `olEnqueueHostCallback`.";
+ let params = [
+ Param<"void *", "UserData", "user specified data passed into `olEnqueueHostCallback`.", PARAM_IN>,
+ ];
+ let return = "void";
+}
+
+def : Function {
+ let name = "olEnqueueHostCallback";
+ let desc = "Enqueue a callback function on the host.";
+ let details = [
+ "The provided function will be called from the same process as the one that called `olEnqueueHostCallback`.",
+ "The callback will not run until all previous work submitted to the queue has completed.",
+ "The callback must return before any work submitted to the queue after it is started.",
+ "The callback must not call any liboffload API functions or any backend specific functions (such as Cuda or HSA library functions).",
+ ];
+ let params = [
+ Param<"ol_queue_handle_t", "Queue", "handle of the queue", PARAM_IN>,
+ Param<"ol_queue_callback_cb_t", "Callback", "the callback function to call on the host", PARAM_IN>,
+ Param<"void *", "UserData", "a pointer that will be passed verbatim to the callback function", PARAM_IN_OPTIONAL>,
+ ];
+ let returns = [];
+}
diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp
index f5365ca274308..0b72f51730091 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -833,5 +833,12 @@ Error olGetSymbolInfoSize_impl(ol_symbol_handle_t Symbol,
return olGetSymbolInfoImplDetail(Symbol, PropName, 0, nullptr, PropSizeRet);
}
+Error olEnqueueHostCallback_impl(ol_queue_handle_t Queue,
+ ol_queue_callback_cb_t Callback,
+ void *UserData) {
+ return Queue->Device->Device->enqueueHostCallback(Callback, UserData,
+ Queue->AsyncInfo);
+}
+
} // namespace offload
} // namespace llvm
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index 796182075ff3d..c2f583dab51ce 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -1063,6 +1063,20 @@ struct AMDGPUStreamTy {
/// Indicate to spread data transfers across all available SDMAs
bool UseMultipleSdmaEngines;
+ /// Wrapper function for implementing host callbacks
+ static void CallbackWrapper(AMDGPUSignalTy *InputSignal,
+ AMDGPUSignalTy *OutputSignal,
+ void (*Callback)(void *), void *UserData) {
+ if (InputSignal)
+ if (auto Err = InputSignal->wait())
+ // Wait shouldn't report an error
+ reportFatalInternalError(std::move(Err));
+
+ Callback(UserData);
+
+ OutputSignal->signal();
+ }
+
/// Return the current number of asynchronous operations on the stream.
uint32_t size() const { return NextSlot; }
@@ -1495,6 +1509,31 @@ struct AMDGPUStreamTy {
OutputSignal->get());
}
+ Error pushHostCallback(void (*Callback)(void *), void *UserData) {
+ // Retrieve an available signal for the operation's output.
+ AMDGPUSignalTy *OutputSignal = nullptr;
+ if (auto Err = SignalManager.getResource(OutputSignal))
+ return Err;
+ OutputSignal->reset();
+ OutputSignal->increaseUseCount();
+
+ AMDGPUSignalTy *InputSignal;
+ {
+ std::lock_guard<std::mutex> Lock(Mutex);
+
+ // Consume stream slot and compute dependencies.
+ InputSignal = consume(OutputSignal).second;
+ }
+
+ // "Leaking" the thread here is consistent with other work added to the
+ // queue. The input and output signals will remain valid until the output is
+ // signaled.
+ std::thread(CallbackWrapper, InputSignal, OutputSignal, Callback, UserData)
+ .detach();
+
+ return Plugin::success();
+ }
+
/// Synchronize with the stream. The current thread waits until all operations
/// are finalized and it performs the pending post actions (i.e., releasing
/// intermediate buffers).
@@ -2553,6 +2592,15 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
return Plugin::success();
}
+ Error enqueueHostCallbackImpl(void (*Callback)(void *), void *UserData,
+ AsyncInfoWrapperTy &AsyncInfo) override {
+ AMDGPUStreamTy *Stream = nullptr;
+ if (auto Err = getStream(AsyncInfo, Stream))
+ return Err;
+
+ return Stream->pushHostCallback(Callback, UserData);
+ };
+
/// Create an event.
Error createEventImpl(void **EventPtrStorage) override {
AMDGPUEventTy **Event = reinterpret_cast<AMDGPUEventTy **>(EventPtrStorage);
diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h
index c9ab34b024b77..28202ac3ce100 100644
--- a/offload/plugins-nextgen/common/include/PluginInterface.h
+++ b/offload/plugins-nextgen/common/include/PluginInterface.h
@@ -965,6 +965,13 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
Error initDeviceInfo(__tgt_device_info *DeviceInfo);
virtual Error initDeviceInfoImpl(__tgt_device_info *DeviceInfo) = 0;
+ /// Enqueue a host call to AsyncInfo
+ Error enqueueHostCallback(void (*Callback)(void *), void *UserData,
+ __tgt_async_info *AsyncInfo);
+ virtual Error enqueueHostCallbackImpl(void (*Callback)(void *),
+ void *UserData,
+ AsyncInfoWrapperTy &AsyncInfo) = 0;
+
/// Create an event.
Error createEvent(void **EventPtrStorage);
virtual Error createEventImpl(void **EventPtrStorage) = 0;
diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp
index 083d41659a469..5c088a097a2a5 100644
--- a/offload/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp
@@ -1589,6 +1589,16 @@ Error GenericDeviceTy::initAsyncInfo(__tgt_async_info **AsyncInfoPtr) {
return Err;
}
+Error GenericDeviceTy::enqueueHostCallback(void (*Callback)(void *),
+ void *UserData,
+ __tgt_async_info *AsyncInfo) {
+ AsyncInfoWrapperTy AsyncInfoWrapper(*this, AsyncInfo);
+
+ auto Err = enqueueHostCallbackImpl(Callback, UserData, AsyncInfoWrapper);
+ AsyncInfoWrapper.finalize(Err);
+ return Err;
+}
+
Error GenericDeviceTy::initDeviceInfo(__tgt_device_info *DeviceInfo) {
assert(DeviceInfo && "Invalid device info");
diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp
index e94f3f6af7dd4..c2dca1386a548 100644
--- a/offload/plugins-nextgen/cuda/src/rtl.cpp
+++ b/offload/plugins-nextgen/cuda/src/rtl.cpp
@@ -873,6 +873,19 @@ struct CUDADeviceTy : public GenericDeviceTy {
return Plugin::success();
}
+ Error enqueueHostCallbackImpl(void (*Callback)(void *), void *UserData,
+ AsyncInfoWrapperTy &AsyncInfo) override {
+ if (auto Err = setContext())
+ return Err;
+
+ CUstream Stream;
+ if (auto Err = getStream(AsyncInfo, Stream))
+ return Err;
+
+ CUresult Res = cuLaunchHostFunc(Stream, Callback, UserData);
+ return Plugin::check(Res, "error in cuStreamLaunchHostFunc: %s");
+ };
+
/// Create an event.
Error createEventImpl(void **EventPtrStorage) override {
CUevent *Event = reinterpret_cast<CUevent *>(EventPtrStorage);
diff --git a/offload/plugins-nextgen/host/src/rtl.cpp b/offload/plugins-nextgen/host/src/rtl.cpp
index ed5213531999d..94d191e5fcd71 100644
--- a/offload/plugins-nextgen/host/src/rtl.cpp
+++ b/offload/plugins-nextgen/host/src/rtl.cpp
@@ -320,6 +320,12 @@ struct GenELF64DeviceTy : public GenericDeviceTy {
"initDeviceInfoImpl not supported");
}
+ Error enqueueHostCallbackImpl(void (*Callback)(void *), void *UserData,
+ AsyncInfoWrapperTy &AsyncInfo) override {
+ Callback(UserData);
+ return Plugin::success();
+ };
+
/// This plugin does not support the event API. Do nothing without failing.
Error createEventImpl(void **EventPtrStorage) override {
*EventPtrStorage = nullptr;
diff --git a/offload/unittests/OffloadAPI/CMakeLists.txt b/offload/unittests/OffloadAPI/CMakeLists.txt
index 8f0267eb39bdf..f3f1b4db2656a 100644
--- a/offload/unittests/OffloadAPI/CMakeLists.txt
+++ b/offload/unittests/OffloadAPI/CMakeLists.txt
@@ -41,7 +41,8 @@ add_offload_unittest("queue"
queue/olDestroyQueue.cpp
queue/olGetQueueInfo.cpp
queue/olGetQueueInfoSize.cpp
- queue/olWaitEvents.cpp)
+ queue/olWaitEvents.cpp
+ queue/olEnqueueHostCallback.cpp)
add_offload_unittest("symbol"
symbol/olGetSymbol.cpp
diff --git a/offload/unittests/OffloadAPI/queue/olEnqueueHostCallback.cpp b/offload/unittests/OffloadAPI/queue/olEnqueueHostCallback.cpp
new file mode 100644
index 0000000000000..27dbfe2f111ce
--- /dev/null
+++ b/offload/unittests/OffloadAPI/queue/olEnqueueHostCallback.cpp
@@ -0,0 +1,48 @@
+//===------- Offload API tests - olEnqueueHostCallback --------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "../common/Fixtures.hpp"
+#include <OffloadAPI.h>
+#include <gtest/gtest.h>
+
+struct olEnqueueHostCallbackTest : OffloadQueueTest {};
+OFFLOAD_TESTS_INSTANTIATE_DEVICE_FIXTURE(olEnqueueHostCallbackTest);
+
+TEST_P(olEnqueueHostCallbackTest, Success) {
+ ASSERT_SUCCESS(olEnqueueHostCallback(Queue, [](void *) {}, nullptr));
+}
+
+TEST_P(olEnqueueHostCallbackTest, SuccessSequence) {
+ uint32_t Buff[16] = {1, 1};
+
+ for (auto BuffPtr = &Buff[2]; BuffPtr != &Buff[16]; BuffPtr++) {
+ ASSERT_SUCCESS(olEnqueueHostCallback(
+ Queue,
+ [](void *BuffPtr) {
+ uint32_t *AsU32 = reinterpret_cast<uint32_t *>(BuffPtr);
+ AsU32[0] = AsU32[-1] + AsU32[-2];
+ },
+ BuffPtr));
+ }
+
+ ASSERT_SUCCESS(olSyncQueue(Queue));
+
+ for (uint32_t i = 2; i < 16; i++) {
+ ASSERT_EQ(Buff[i], Buff[i - 1] + Buff[i - 2]);
+ }
+}
+
+TEST_P(olEnqueueHostCallbackTest, InvalidNullCallback) {
+ ASSERT_ERROR(OL_ERRC_INVALID_NULL_POINTER,
+ olEnqueueHostCallback(Queue, nullptr, nullptr));
+}
+
+TEST_P(olEnqueueHostCallbackTest, InvalidNullQueue) {
+ ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE,
+ olEnqueueHostCallback(nullptr, [](void *) {}, nullptr));
+}
>From 57d8254622b1eb69f11e0fb911c6be66821fe04e Mon Sep 17 00:00:00 2001
From: Ross Brunton <ross at codeplay.com>
Date: Thu, 7 Aug 2025 14:52:50 +0100
Subject: [PATCH 2/6] Add kernel blocking test
---
.../queue/olEnqueueHostCallback.cpp | 58 +++++++++++++++++++
1 file changed, 58 insertions(+)
diff --git a/offload/unittests/OffloadAPI/queue/olEnqueueHostCallback.cpp b/offload/unittests/OffloadAPI/queue/olEnqueueHostCallback.cpp
index 27dbfe2f111ce..17c8ecc322701 100644
--- a/offload/unittests/OffloadAPI/queue/olEnqueueHostCallback.cpp
+++ b/offload/unittests/OffloadAPI/queue/olEnqueueHostCallback.cpp
@@ -9,10 +9,14 @@
#include "../common/Fixtures.hpp"
#include <OffloadAPI.h>
#include <gtest/gtest.h>
+#include <thread>
struct olEnqueueHostCallbackTest : OffloadQueueTest {};
OFFLOAD_TESTS_INSTANTIATE_DEVICE_FIXTURE(olEnqueueHostCallbackTest);
+struct olEnqueueHostCallbackKernelTest : OffloadKernelTest {};
+OFFLOAD_TESTS_INSTANTIATE_DEVICE_FIXTURE(olEnqueueHostCallbackKernelTest);
+
TEST_P(olEnqueueHostCallbackTest, Success) {
ASSERT_SUCCESS(olEnqueueHostCallback(Queue, [](void *) {}, nullptr));
}
@@ -37,6 +41,60 @@ TEST_P(olEnqueueHostCallbackTest, SuccessSequence) {
}
}
+TEST_P(olEnqueueHostCallbackKernelTest, SuccessBlocking) {
+ // Verify that a host kernel can block execution - A host task is created that
+ // only resolves when Block is set to false.
+ ol_kernel_launch_size_args_t LaunchArgs;
+ LaunchArgs.Dimensions = 1;
+ LaunchArgs.GroupSize = {64, 1, 1};
+ LaunchArgs.NumGroups = {1, 1, 1};
+ LaunchArgs.DynSharedMemory = 0;
+
+ ol_queue_handle_t Queue;
+ ASSERT_SUCCESS(olCreateQueue(Device, &Queue));
+
+ void *Mem;
+ ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED,
+ LaunchArgs.GroupSize.x * sizeof(uint32_t), &Mem));
+
+ uint32_t *Data = (uint32_t *)Mem;
+ for (uint32_t i = 0; i < 64; i++) {
+ Data[i] = 0;
+ }
+
+ volatile bool Block = true;
+ ASSERT_SUCCESS(olEnqueueHostCallback(
+ Queue,
+ [](void *Ptr) {
+ volatile bool *Block =
+ reinterpret_cast<volatile bool *>(reinterpret_cast<bool *>(Ptr));
+
+ while (*Block)
+ std::this_thread::yield();
+ },
+ const_cast<bool *>(&Block)));
+
+ struct {
+ void *Mem;
+ } Args{Mem};
+ ASSERT_SUCCESS(
+ olLaunchKernel(Queue, Device, Kernel, &Args, sizeof(Args), &LaunchArgs));
+
+ std::this_thread::sleep_for(std::chrono::milliseconds(500));
+ for (uint32_t i = 0; i < 64; i++) {
+ ASSERT_EQ(Data[i], 0);
+ }
+
+ Block = false;
+ ASSERT_SUCCESS(olSyncQueue(Queue));
+
+ for (uint32_t i = 0; i < 64; i++) {
+ ASSERT_EQ(Data[i], i);
+ }
+
+ ASSERT_SUCCESS(olMemFree(Mem));
+}
+
TEST_P(olEnqueueHostCallbackTest, InvalidNullCallback) {
ASSERT_ERROR(OL_ERRC_INVALID_NULL_POINTER,
olEnqueueHostCallback(Queue, nullptr, nullptr));
>From 9dd222b6c868c8cf33523cb46db568978b3dfb2a Mon Sep 17 00:00:00 2001
From: Ross Brunton <ross at codeplay.com>
Date: Thu, 7 Aug 2025 15:17:23 +0100
Subject: [PATCH 3/6] Rename to olLaunchHostFunction
---
offload/liboffload/API/Queue.td | 12 ++++----
offload/liboffload/src/OffloadImpl.cpp | 6 ++--
offload/unittests/OffloadAPI/CMakeLists.txt | 2 +-
...tCallback.cpp => olLaunchHostFunction.cpp} | 30 +++++++++----------
4 files changed, 25 insertions(+), 25 deletions(-)
rename offload/unittests/OffloadAPI/queue/{olEnqueueHostCallback.cpp => olLaunchHostFunction.cpp} (72%)
diff --git a/offload/liboffload/API/Queue.td b/offload/liboffload/API/Queue.td
index 5fd202bba8496..0e20e23999d5e 100644
--- a/offload/liboffload/API/Queue.td
+++ b/offload/liboffload/API/Queue.td
@@ -110,26 +110,26 @@ def : Function {
}
def : FptrTypedef {
- let name = "ol_queue_callback_cb_t";
- let desc = "Callback function for use by `olEnqueueHostCallback`.";
+ let name = "ol_host_function_cb_t";
+ let desc = "Host function for use by `olLaunchHostFunction`.";
let params = [
- Param<"void *", "UserData", "user specified data passed into `olEnqueueHostCallback`.", PARAM_IN>,
+ Param<"void *", "UserData", "user specified data passed into `olLaunchHostFunction`.", PARAM_IN>,
];
let return = "void";
}
def : Function {
- let name = "olEnqueueHostCallback";
+ let name = "olLaunchHostFunction";
let desc = "Enqueue a callback function on the host.";
let details = [
- "The provided function will be called from the same process as the one that called `olEnqueueHostCallback`.",
+ "The provided function will be called from the same process as the one that called `olLaunchHostFunction`.",
"The callback will not run until all previous work submitted to the queue has completed.",
"The callback must return before any work submitted to the queue after it is started.",
"The callback must not call any liboffload API functions or any backend specific functions (such as Cuda or HSA library functions).",
];
let params = [
Param<"ol_queue_handle_t", "Queue", "handle of the queue", PARAM_IN>,
- Param<"ol_queue_callback_cb_t", "Callback", "the callback function to call on the host", PARAM_IN>,
+ Param<"ol_host_function_cb_t", "Callback", "the callback function to call on the host", PARAM_IN>,
Param<"void *", "UserData", "a pointer that will be passed verbatim to the callback function", PARAM_IN_OPTIONAL>,
];
let returns = [];
diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp
index 0b72f51730091..5eee63ea60218 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -833,9 +833,9 @@ Error olGetSymbolInfoSize_impl(ol_symbol_handle_t Symbol,
return olGetSymbolInfoImplDetail(Symbol, PropName, 0, nullptr, PropSizeRet);
}
-Error olEnqueueHostCallback_impl(ol_queue_handle_t Queue,
- ol_queue_callback_cb_t Callback,
- void *UserData) {
+Error olLaunchHostFunction_impl(ol_queue_handle_t Queue,
+ ol_host_function_cb_t Callback,
+ void *UserData) {
return Queue->Device->Device->enqueueHostCallback(Callback, UserData,
Queue->AsyncInfo);
}
diff --git a/offload/unittests/OffloadAPI/CMakeLists.txt b/offload/unittests/OffloadAPI/CMakeLists.txt
index f3f1b4db2656a..b25db7022e9d7 100644
--- a/offload/unittests/OffloadAPI/CMakeLists.txt
+++ b/offload/unittests/OffloadAPI/CMakeLists.txt
@@ -42,7 +42,7 @@ add_offload_unittest("queue"
queue/olGetQueueInfo.cpp
queue/olGetQueueInfoSize.cpp
queue/olWaitEvents.cpp
- queue/olEnqueueHostCallback.cpp)
+ queue/olLaunchHostFunction.cpp)
add_offload_unittest("symbol"
symbol/olGetSymbol.cpp
diff --git a/offload/unittests/OffloadAPI/queue/olEnqueueHostCallback.cpp b/offload/unittests/OffloadAPI/queue/olLaunchHostFunction.cpp
similarity index 72%
rename from offload/unittests/OffloadAPI/queue/olEnqueueHostCallback.cpp
rename to offload/unittests/OffloadAPI/queue/olLaunchHostFunction.cpp
index 17c8ecc322701..bbcfe01784096 100644
--- a/offload/unittests/OffloadAPI/queue/olEnqueueHostCallback.cpp
+++ b/offload/unittests/OffloadAPI/queue/olLaunchHostFunction.cpp
@@ -1,4 +1,4 @@
-//===------- Offload API tests - olEnqueueHostCallback --------------------===//
+//===------- Offload API tests - olLaunchHostFunction ---------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -11,21 +11,21 @@
#include <gtest/gtest.h>
#include <thread>
-struct olEnqueueHostCallbackTest : OffloadQueueTest {};
-OFFLOAD_TESTS_INSTANTIATE_DEVICE_FIXTURE(olEnqueueHostCallbackTest);
+struct olLaunchHostFunctionTest : OffloadQueueTest {};
+OFFLOAD_TESTS_INSTANTIATE_DEVICE_FIXTURE(olLaunchHostFunctionTest);
-struct olEnqueueHostCallbackKernelTest : OffloadKernelTest {};
-OFFLOAD_TESTS_INSTANTIATE_DEVICE_FIXTURE(olEnqueueHostCallbackKernelTest);
+struct olLaunchHostFunctionKernelTest : OffloadKernelTest {};
+OFFLOAD_TESTS_INSTANTIATE_DEVICE_FIXTURE(olLaunchHostFunctionKernelTest);
-TEST_P(olEnqueueHostCallbackTest, Success) {
- ASSERT_SUCCESS(olEnqueueHostCallback(Queue, [](void *) {}, nullptr));
+TEST_P(olLaunchHostFunctionTest, Success) {
+ ASSERT_SUCCESS(olLaunchHostFunction(Queue, [](void *) {}, nullptr));
}
-TEST_P(olEnqueueHostCallbackTest, SuccessSequence) {
+TEST_P(olLaunchHostFunctionTest, SuccessSequence) {
uint32_t Buff[16] = {1, 1};
for (auto BuffPtr = &Buff[2]; BuffPtr != &Buff[16]; BuffPtr++) {
- ASSERT_SUCCESS(olEnqueueHostCallback(
+ ASSERT_SUCCESS(olLaunchHostFunction(
Queue,
[](void *BuffPtr) {
uint32_t *AsU32 = reinterpret_cast<uint32_t *>(BuffPtr);
@@ -41,7 +41,7 @@ TEST_P(olEnqueueHostCallbackTest, SuccessSequence) {
}
}
-TEST_P(olEnqueueHostCallbackKernelTest, SuccessBlocking) {
+TEST_P(olLaunchHostFunctionKernelTest, SuccessBlocking) {
// Verify that a host kernel can block execution - A host task is created that
// only resolves when Block is set to false.
ol_kernel_launch_size_args_t LaunchArgs;
@@ -63,7 +63,7 @@ TEST_P(olEnqueueHostCallbackKernelTest, SuccessBlocking) {
}
volatile bool Block = true;
- ASSERT_SUCCESS(olEnqueueHostCallback(
+ ASSERT_SUCCESS(olLaunchHostFunction(
Queue,
[](void *Ptr) {
volatile bool *Block =
@@ -95,12 +95,12 @@ TEST_P(olEnqueueHostCallbackKernelTest, SuccessBlocking) {
ASSERT_SUCCESS(olMemFree(Mem));
}
-TEST_P(olEnqueueHostCallbackTest, InvalidNullCallback) {
+TEST_P(olLaunchHostFunctionTest, InvalidNullCallback) {
ASSERT_ERROR(OL_ERRC_INVALID_NULL_POINTER,
- olEnqueueHostCallback(Queue, nullptr, nullptr));
+ olLaunchHostFunction(Queue, nullptr, nullptr));
}
-TEST_P(olEnqueueHostCallbackTest, InvalidNullQueue) {
+TEST_P(olLaunchHostFunctionTest, InvalidNullQueue) {
ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE,
- olEnqueueHostCallback(nullptr, [](void *) {}, nullptr));
+ olLaunchHostFunction(nullptr, [](void *) {}, nullptr));
}
>From 4679b16439e6402dd6cba4bbb7f5eaadbd0b9b2b Mon Sep 17 00:00:00 2001
From: Ross Brunton <ross at codeplay.com>
Date: Thu, 7 Aug 2025 15:40:49 +0100
Subject: [PATCH 4/6] Delete queue
---
offload/unittests/OffloadAPI/queue/olLaunchHostFunction.cpp | 1 +
1 file changed, 1 insertion(+)
diff --git a/offload/unittests/OffloadAPI/queue/olLaunchHostFunction.cpp b/offload/unittests/OffloadAPI/queue/olLaunchHostFunction.cpp
index bbcfe01784096..aa86750f6adf9 100644
--- a/offload/unittests/OffloadAPI/queue/olLaunchHostFunction.cpp
+++ b/offload/unittests/OffloadAPI/queue/olLaunchHostFunction.cpp
@@ -92,6 +92,7 @@ TEST_P(olLaunchHostFunctionKernelTest, SuccessBlocking) {
ASSERT_EQ(Data[i], i);
}
+ ASSERT_SUCCESS(olDestroyQueue(Queue));
ASSERT_SUCCESS(olMemFree(Mem));
}
>From 9024ac82fe90c903574396a9c9dee469eedf0b34 Mon Sep 17 00:00:00 2001
From: Ross Brunton <ross at codeplay.com>
Date: Fri, 8 Aug 2025 09:46:57 +0100
Subject: [PATCH 5/6] Clean docs and renamed enqueueHostCallback
---
offload/liboffload/src/OffloadImpl.cpp | 4 ++--
offload/plugins-nextgen/amdgpu/src/rtl.cpp | 4 ++--
.../plugins-nextgen/common/include/PluginInterface.h | 11 +++++------
.../plugins-nextgen/common/src/PluginInterface.cpp | 7 +++----
offload/plugins-nextgen/cuda/src/rtl.cpp | 4 ++--
offload/plugins-nextgen/host/src/rtl.cpp | 4 ++--
6 files changed, 16 insertions(+), 18 deletions(-)
diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp
index 5eee63ea60218..ccabf5fc0e799 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -836,8 +836,8 @@ Error olGetSymbolInfoSize_impl(ol_symbol_handle_t Symbol,
Error olLaunchHostFunction_impl(ol_queue_handle_t Queue,
ol_host_function_cb_t Callback,
void *UserData) {
- return Queue->Device->Device->enqueueHostCallback(Callback, UserData,
- Queue->AsyncInfo);
+ return Queue->Device->Device->enqueueHostCall(Callback, UserData,
+ Queue->AsyncInfo);
}
} // namespace offload
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index c2f583dab51ce..dbd69998616e4 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -2592,8 +2592,8 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
return Plugin::success();
}
- Error enqueueHostCallbackImpl(void (*Callback)(void *), void *UserData,
- AsyncInfoWrapperTy &AsyncInfo) override {
+ Error enqueueHostCallImpl(void (*Callback)(void *), void *UserData,
+ AsyncInfoWrapperTy &AsyncInfo) override {
AMDGPUStreamTy *Stream = nullptr;
if (auto Err = getStream(AsyncInfo, Stream))
return Err;
diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h
index 28202ac3ce100..5e32a1a76d966 100644
--- a/offload/plugins-nextgen/common/include/PluginInterface.h
+++ b/offload/plugins-nextgen/common/include/PluginInterface.h
@@ -965,12 +965,11 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
Error initDeviceInfo(__tgt_device_info *DeviceInfo);
virtual Error initDeviceInfoImpl(__tgt_device_info *DeviceInfo) = 0;
- /// Enqueue a host call to AsyncInfo
- Error enqueueHostCallback(void (*Callback)(void *), void *UserData,
- __tgt_async_info *AsyncInfo);
- virtual Error enqueueHostCallbackImpl(void (*Callback)(void *),
- void *UserData,
- AsyncInfoWrapperTy &AsyncInfo) = 0;
+ /// Enqueue a host call to AsyncInfo
+ Error enqueueHostCall(void (*Callback)(void *), void *UserData,
+ __tgt_async_info *AsyncInfo);
+ virtual Error enqueueHostCallImpl(void (*Callback)(void *), void *UserData,
+ AsyncInfoWrapperTy &AsyncInfo) = 0;
/// Create an event.
Error createEvent(void **EventPtrStorage);
diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp
index 5c088a097a2a5..f177c5bc9f487 100644
--- a/offload/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp
@@ -1589,12 +1589,11 @@ Error GenericDeviceTy::initAsyncInfo(__tgt_async_info **AsyncInfoPtr) {
return Err;
}
-Error GenericDeviceTy::enqueueHostCallback(void (*Callback)(void *),
- void *UserData,
- __tgt_async_info *AsyncInfo) {
+Error GenericDeviceTy::enqueueHostCall(void (*Callback)(void *), void *UserData,
+ __tgt_async_info *AsyncInfo) {
AsyncInfoWrapperTy AsyncInfoWrapper(*this, AsyncInfo);
- auto Err = enqueueHostCallbackImpl(Callback, UserData, AsyncInfoWrapper);
+ auto Err = enqueueHostCallImpl(Callback, UserData, AsyncInfoWrapper);
AsyncInfoWrapper.finalize(Err);
return Err;
}
diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp
index c2dca1386a548..5e1843c045534 100644
--- a/offload/plugins-nextgen/cuda/src/rtl.cpp
+++ b/offload/plugins-nextgen/cuda/src/rtl.cpp
@@ -873,8 +873,8 @@ struct CUDADeviceTy : public GenericDeviceTy {
return Plugin::success();
}
- Error enqueueHostCallbackImpl(void (*Callback)(void *), void *UserData,
- AsyncInfoWrapperTy &AsyncInfo) override {
+ Error enqueueHostCallImpl(void (*Callback)(void *), void *UserData,
+ AsyncInfoWrapperTy &AsyncInfo) override {
if (auto Err = setContext())
return Err;
diff --git a/offload/plugins-nextgen/host/src/rtl.cpp b/offload/plugins-nextgen/host/src/rtl.cpp
index 94d191e5fcd71..f8ddc6713c011 100644
--- a/offload/plugins-nextgen/host/src/rtl.cpp
+++ b/offload/plugins-nextgen/host/src/rtl.cpp
@@ -320,8 +320,8 @@ struct GenELF64DeviceTy : public GenericDeviceTy {
"initDeviceInfoImpl not supported");
}
- Error enqueueHostCallbackImpl(void (*Callback)(void *), void *UserData,
- AsyncInfoWrapperTy &AsyncInfo) override {
+ Error enqueueHostCallImpl(void (*Callback)(void *), void *UserData,
+ AsyncInfoWrapperTy &AsyncInfo) override {
Callback(UserData);
return Plugin::success();
};
>From e5736269ecbee2cf3b4bfa961f8186af4d5c529e Mon Sep 17 00:00:00 2001
From: Ross Brunton <ross at codeplay.com>
Date: Thu, 14 Aug 2025 13:31:47 +0100
Subject: [PATCH 6/6] Respond to feedback
---
offload/plugins-nextgen/amdgpu/src/rtl.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index dbd69998616e4..536c662451dfd 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -1067,9 +1067,9 @@ struct AMDGPUStreamTy {
static void CallbackWrapper(AMDGPUSignalTy *InputSignal,
AMDGPUSignalTy *OutputSignal,
void (*Callback)(void *), void *UserData) {
+ // The wait call will not error in this context.
if (InputSignal)
if (auto Err = InputSignal->wait())
- // Wait shouldn't report an error
reportFatalInternalError(std::move(Err));
Callback(UserData);
More information about the llvm-commits
mailing list