[llvm] [Offload] Use `amd_signal_async_handler` for host function calls (PR #154131)
Ross Brunton via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 12 10:24:29 PDT 2025
https://github.com/RossBrunton updated https://github.com/llvm/llvm-project/pull/154131
>From d9e6a6f9cfc8d2c329aae27eb41018c16b73f241 Mon Sep 17 00:00:00 2001
From: Ross Brunton <ross at codeplay.com>
Date: Mon, 18 Aug 2025 16:06:52 +0100
Subject: [PATCH 1/3] [Offload] Use `amd_signal_async_handler` for host
function calls
---
offload/plugins-nextgen/amdgpu/src/rtl.cpp | 44 +++++++++++++---------
1 file changed, 27 insertions(+), 17 deletions(-)
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index c26cfe961aa0e..80e823e7ee833 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -1079,18 +1079,19 @@ struct AMDGPUStreamTy {
/// Indicate to spread data transfers across all available SDMAs
bool UseMultipleSdmaEngines;
+ struct CallbackDataType {
+ void (*UserFn)(void *);
+ void *UserData;
+ AMDGPUSignalTy *OutputSignal;
+ };
/// Wrapper function for implementing host callbacks
- static void CallbackWrapper(AMDGPUSignalTy *InputSignal,
- AMDGPUSignalTy *OutputSignal,
- void (*Callback)(void *), void *UserData) {
- // The wait call will not error in this context.
- if (InputSignal)
- if (auto Err = InputSignal->wait())
- reportFatalInternalError(std::move(Err));
-
- Callback(UserData);
-
- OutputSignal->signal();
+ static bool callbackWrapper([[maybe_unused]] hsa_signal_value_t Signal,
+ void *UserData) {
+ auto CallbackData = reinterpret_cast<CallbackDataType *>(UserData);
+ CallbackData->UserFn(CallbackData->UserData);
+ CallbackData->OutputSignal->signal();
+ delete CallbackData;
+ return false;
}
/// Return the current number of asynchronous operations on the stream.
@@ -1551,13 +1552,22 @@ struct AMDGPUStreamTy {
InputSignal = consume(OutputSignal).second;
}
- // "Leaking" the thread here is consistent with other work added to the
- // queue. The input and output signals will remain valid until the output is
- // signaled.
- std::thread(CallbackWrapper, InputSignal, OutputSignal, Callback, UserData)
- .detach();
+ auto *CallbackData = new CallbackDataType{Callback, UserData, OutputSignal};
+ if (InputSignal && InputSignal->load()) {
+ hsa_status_t Status = hsa_amd_signal_async_handler(
+ InputSignal->get(), HSA_SIGNAL_CONDITION_EQ, 0, callbackWrapper,
+ CallbackData);
- return Plugin::success();
+ return Plugin::check(Status, "error in hsa_amd_signal_async_handler: %s");
+ } else {
+ // No dependencies - schedule it now.
+ // Using a seperate thread because this function should run asynchronously
+ // and not block the main thread.
+ std::thread([](void *CallbackData) { callbackWrapper(0, CallbackData); },
+ CallbackData)
+ .detach();
+ return Plugin::success();
+ }
}
/// Synchronize with the stream. The current thread waits until all operations
>From bf09408825ccb1403bd0912d2bc8cc7b5d2d833f Mon Sep 17 00:00:00 2001
From: Ross Brunton <ross at codeplay.com>
Date: Tue, 19 Aug 2025 10:05:50 +0100
Subject: [PATCH 2/3] Use function pointer type for callback function
---
offload/plugins-nextgen/amdgpu/src/rtl.cpp | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index 80e823e7ee833..6c0dd403719ee 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -918,6 +918,10 @@ struct AMDGPUQueueTy {
/// devices. This class relies on signals to implement streams and define the
/// dependencies between asynchronous operations.
struct AMDGPUStreamTy {
+public:
+ /// Function pointer type for `pushHostCallback`
+ using HostFnType = void (*)(void *);
+
private:
/// Utility struct holding arguments for async H2H memory copies.
struct MemcpyArgsTy {
@@ -1080,7 +1084,7 @@ struct AMDGPUStreamTy {
bool UseMultipleSdmaEngines;
struct CallbackDataType {
- void (*UserFn)(void *);
+ HostFnType UserFn;
void *UserData;
AMDGPUSignalTy *OutputSignal;
};
@@ -1536,7 +1540,7 @@ struct AMDGPUStreamTy {
OutputSignal->get());
}
- Error pushHostCallback(void (*Callback)(void *), void *UserData) {
+ Error pushHostCallback(HostFnType Callback, void *UserData) {
// Retrieve an available signal for the operation's output.
AMDGPUSignalTy *OutputSignal = nullptr;
if (auto Err = SignalManager.getResource(OutputSignal))
@@ -2722,7 +2726,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
return Plugin::success();
}
- Error enqueueHostCallImpl(void (*Callback)(void *), void *UserData,
+ Error enqueueHostCallImpl(AMDGPUStreamTy::HostFnType Callback, void *UserData,
AsyncInfoWrapperTy &AsyncInfo) override {
AMDGPUStreamTy *Stream = nullptr;
if (auto Err = getStream(AsyncInfo, Stream))
>From fa920cd87f90f63b4ca16586730666bbd741164f Mon Sep 17 00:00:00 2001
From: Ross Brunton <ross at codeplay.com>
Date: Mon, 1 Sep 2025 15:27:09 +0100
Subject: [PATCH 3/3] Style
---
offload/plugins-nextgen/amdgpu/src/rtl.cpp | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index 6c0dd403719ee..e1d3d75695163 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -1563,15 +1563,15 @@ struct AMDGPUStreamTy {
CallbackData);
return Plugin::check(Status, "error in hsa_amd_signal_async_handler: %s");
- } else {
- // No dependencies - schedule it now.
- // Using a seperate thread because this function should run asynchronously
- // and not block the main thread.
- std::thread([](void *CallbackData) { callbackWrapper(0, CallbackData); },
- CallbackData)
- .detach();
- return Plugin::success();
}
+
+ // No dependencies - schedule it now.
+ // Using a seperate thread because this function should run asynchronously
+ // and not block the main thread.
+ std::thread([](void *CallbackData) { callbackWrapper(0, CallbackData); },
+ CallbackData)
+ .detach();
+ return Plugin::success();
}
/// Synchronize with the stream. The current thread waits until all operations
More information about the llvm-commits
mailing list