[llvm] [Offload] Implement event sync in amdgpu (PR #149300)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 17 05:47:52 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Ross Brunton (RossBrunton)
<details>
<summary>Changes</summary>
---
Full diff: https://github.com/llvm/llvm-project/pull/149300.diff
3 Files Affected:
- (modified) offload/plugins-nextgen/amdgpu/src/rtl.cpp (+50-2)
- (modified) offload/unittests/OffloadAPI/common/Fixtures.hpp (-3)
- (modified) offload/unittests/OffloadAPI/event/olWaitEvent.cpp (-3)
``````````diff
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index 12c7cc62905c9..b2fd950c9d500 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -1115,6 +1115,18 @@ struct AMDGPUStreamTy {
return Plugin::success();
}
+ /// Complete pending post actions until and including the event in target
+ /// slot.
+ Error completeUntil(uint32_t TargetSlot) {
+ for (uint32_t Slot = 0; Slot <= TargetSlot; ++Slot) {
+ // Take the post action of the operation if any.
+ if (auto Err = Slots[Slot].performAction())
+ return Err;
+ }
+
+ return Plugin::success();
+ }
+
/// Make the current stream wait on a specific operation of another stream.
/// The idea is to make the current stream waiting on two signals: 1) the last
/// signal of the current stream, and 2) the last signal of the other stream.
@@ -1502,6 +1514,11 @@ struct AMDGPUStreamTy {
return complete();
}
+ /// Synchronize the stream until the given event. The current thread waits
+ /// until the provided event is finalized, and it performs the pending post
+ /// actions for that and prior events.
+ Error synchronizeOn(AMDGPUEventTy &Event);
+
/// Query the stream and complete pending post actions if operations finished.
/// Return whether all the operations completed. This operation does not block
/// the calling thread.
@@ -1575,6 +1592,21 @@ struct AMDGPUEventTy {
return Stream.waitEvent(*this);
}
+ Error sync() {
+ std::lock_guard<std::mutex> Lock(Mutex);
+
+ if (!RecordedStream)
+ return Plugin::error(ErrorCode::INVALID_ARGUMENT,
+ "event does not have any recorded stream");
+
+ // No need to wait on anything, the recorded stream already finished the
+ // corresponding operation.
+ if (RecordedSlot < 0)
+ return Plugin::success();
+
+ return RecordedStream->synchronizeOn(*this);
+ }
+
protected:
/// The stream registered in this event.
AMDGPUStreamTy *RecordedStream;
@@ -1630,6 +1662,22 @@ Error AMDGPUStreamTy::waitEvent(const AMDGPUEventTy &Event) {
return waitOnStreamOperation(RecordedStream, Event.RecordedSlot);
}
+Error AMDGPUStreamTy::synchronizeOn(AMDGPUEventTy &Event) {
+ std::lock_guard<std::mutex> Lock(Mutex);
+
+ // Wait until the requested slot has completed
+ if (auto Err = Slots[Event.RecordedSlot].Signal->wait(
+ StreamBusyWaitMicroseconds, &Device))
+ return Err;
+
+ // If the event is the last one in the stream, just do a full finalize
+ if (Event.RecordedSlot == last())
+ return complete();
+
+ // Otherwise, only finalize until the appropriate event
+ return completeUntil(Event.RecordedSlot);
+}
+
struct AMDGPUStreamManagerTy final
: GenericDeviceResourceManagerTy<AMDGPUResourceRef<AMDGPUStreamTy>> {
using ResourceRef = AMDGPUResourceRef<AMDGPUStreamTy>;
@@ -2540,8 +2588,8 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
/// Synchronize the current thread with the event.
Error syncEventImpl(void *EventPtr) override {
- return Plugin::error(ErrorCode::UNIMPLEMENTED,
- "synchronize event not implemented");
+ AMDGPUEventTy *Event = reinterpret_cast<AMDGPUEventTy *>(EventPtr);
+ return Event->sync();
}
/// Print information about the device.
diff --git a/offload/unittests/OffloadAPI/common/Fixtures.hpp b/offload/unittests/OffloadAPI/common/Fixtures.hpp
index e5d815ecda965..546921164f691 100644
--- a/offload/unittests/OffloadAPI/common/Fixtures.hpp
+++ b/offload/unittests/OffloadAPI/common/Fixtures.hpp
@@ -171,9 +171,6 @@ struct OffloadQueueTest : OffloadDeviceTest {
struct OffloadEventTest : OffloadQueueTest {
void SetUp() override {
RETURN_ON_FATAL_FAILURE(OffloadQueueTest::SetUp());
- if (getPlatformBackend() == OL_PLATFORM_BACKEND_AMDGPU)
- GTEST_SKIP() << "AMDGPU synchronize event not implemented";
-
// Get an event from a memcpy. We can still use it in olGetEventInfo etc
// after it has been waited on.
void *Alloc;
diff --git a/offload/unittests/OffloadAPI/event/olWaitEvent.cpp b/offload/unittests/OffloadAPI/event/olWaitEvent.cpp
index 05356d4ef8d75..f80dabb4fc93f 100644
--- a/offload/unittests/OffloadAPI/event/olWaitEvent.cpp
+++ b/offload/unittests/OffloadAPI/event/olWaitEvent.cpp
@@ -14,9 +14,6 @@ using olWaitEventTest = OffloadQueueTest;
OFFLOAD_TESTS_INSTANTIATE_DEVICE_FIXTURE(olWaitEventTest);
TEST_P(olWaitEventTest, Success) {
- if (getPlatformBackend() == OL_PLATFORM_BACKEND_AMDGPU)
- GTEST_SKIP() << "AMDGPU synchronize event not implemented";
-
uint32_t Src = 42;
void *DstPtr;
``````````
</details>
https://github.com/llvm/llvm-project/pull/149300
More information about the llvm-commits
mailing list