[llvm] [Offload] Don't create events for empty queues (PR #152304)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 6 06:03:14 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Ross Brunton (RossBrunton)
<details>
<summary>Changes</summary>
Add a device function to check if a device queue is empty. If liboffload
tries to create an event for an empty queue, we create an "empty" event
that is already complete.
This allows `olCreateEvent`, `olSyncEvent` and `olWaitEvent` to run
quickly for empty queues.
---
Full diff: https://github.com/llvm/llvm-project/pull/152304.diff
6 Files Affected:
- (modified) offload/liboffload/src/OffloadImpl.cpp (+20-4)
- (modified) offload/plugins-nextgen/amdgpu/src/rtl.cpp (+11)
- (modified) offload/plugins-nextgen/common/include/PluginInterface.h (+8)
- (modified) offload/plugins-nextgen/common/src/PluginInterface.cpp (+15)
- (modified) offload/plugins-nextgen/cuda/src/rtl.cpp (+5)
- (modified) offload/plugins-nextgen/host/src/rtl.cpp (+3)
``````````diff
diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp
index 6486b2b6d13a6..272a12ab59a06 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -72,6 +72,8 @@ struct ol_queue_impl_t {
struct ol_event_impl_t {
ol_event_impl_t(void *EventInfo, ol_queue_handle_t Queue)
: EventInfo(EventInfo), Queue(Queue) {}
+ // EventInfo may be null, in which case the event should be considered always
+ // complete
void *EventInfo;
ol_queue_handle_t Queue;
};
@@ -509,8 +511,8 @@ Error olWaitEvents_impl(ol_queue_handle_t Queue, ol_event_handle_t *Events,
return Plugin::error(ErrorCode::INVALID_NULL_HANDLE,
"olWaitEvents asked to wait on a NULL event");
- // Do nothing if the event is for this queue
- if (Event->Queue == Queue)
+ // Do nothing if the event is for this queue or the event is always complete
+ if (Event->Queue == Queue || !Event->EventInfo)
continue;
if (auto Err = Device->waitEvent(Event->EventInfo, Queue->AsyncInfo))
@@ -548,6 +550,10 @@ Error olGetQueueInfoSize_impl(ol_queue_handle_t Queue, ol_queue_info_t PropName,
}
Error olSyncEvent_impl(ol_event_handle_t Event) {
+ if (!Event->EventInfo)
+ // Event always complete
+ return Plugin::success();
+
if (auto Res = Event->Queue->Device->Device->syncEvent(Event->EventInfo))
return Res;
@@ -555,8 +561,9 @@ Error olSyncEvent_impl(ol_event_handle_t Event) {
}
Error olDestroyEvent_impl(ol_event_handle_t Event) {
- if (auto Res = Event->Queue->Device->Device->destroyEvent(Event->EventInfo))
- return Res;
+ if (Event->EventInfo)
+ if (auto Res = Event->Queue->Device->Device->destroyEvent(Event->EventInfo))
+ return Res;
return olDestroy(Event);
}
@@ -590,7 +597,16 @@ Error olGetEventInfoSize_impl(ol_event_handle_t Event, ol_event_info_t PropName,
}
Error olCreateEvent_impl(ol_queue_handle_t Queue, ol_event_handle_t *EventOut) {
+ auto Pending = Queue->Device->Device->hasPendingWork(Queue->AsyncInfo);
+ if (auto Err = Pending.takeError())
+ return Err;
+
*EventOut = new ol_event_impl_t(nullptr, Queue);
+ if (!*Pending)
+ // Queue is empty, don't record an event and consider the event always
+ // complete
+ return Plugin::success();
+
if (auto Res = Queue->Device->Device->createEvent(&(*EventOut)->EventInfo))
return Res;
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index b7bfa89fc9ea6..833c56df21ac3 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -2591,6 +2591,17 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
return Event->wait(*Stream);
}
+ Expected<bool> hasPendingWorkImpl(AsyncInfoWrapperTy &AsyncInfo) override {
+ auto Stream = AsyncInfo.getQueueAs<AMDGPUStreamTy *>();
+ if (!Stream)
+ return false;
+
+ auto Query = Stream->query();
+ if (Query)
+ return !*Query;
+ return Query.takeError();
+ }
+
/// Synchronize the current thread with the event.
Error syncEventImpl(void *EventPtr) override {
AMDGPUEventTy *Event = reinterpret_cast<AMDGPUEventTy *>(EventPtr);
diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h
index 8c17a2ee07047..99e7a13ff2b37 100644
--- a/offload/plugins-nextgen/common/include/PluginInterface.h
+++ b/offload/plugins-nextgen/common/include/PluginInterface.h
@@ -939,6 +939,14 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
Error printInfo();
virtual Expected<InfoTreeNode> obtainInfoImpl() = 0;
+ /// Return true if the device has work that is either queued or currently
+ /// running
+ ///
+ /// Devices which cannot report this information should always return true
+ Expected<bool> hasPendingWork(__tgt_async_info *AsyncInfo);
+ virtual Expected<bool>
+ hasPendingWorkImpl(AsyncInfoWrapperTy &AsyncInfoWrapper) = 0;
+
/// Getters of the grid values.
uint32_t getWarpSize() const { return GridValues.GV_Warp_Size; }
uint32_t getThreadLimit() const { return GridValues.GV_Max_WG_Size; }
diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp
index 94a050b559efe..cce2405cd51c4 100644
--- a/offload/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp
@@ -1623,6 +1623,21 @@ Error GenericDeviceTy::waitEvent(void *EventPtr, __tgt_async_info *AsyncInfo) {
return Err;
}
+Expected<bool> GenericDeviceTy::hasPendingWork(__tgt_async_info *AsyncInfo) {
+ AsyncInfoWrapperTy AsyncInfoWrapper(*this, AsyncInfo);
+ auto Res = hasPendingWorkImpl(AsyncInfoWrapper);
+ if (auto Err = Res.takeError()) {
+ AsyncInfoWrapper.finalize(Err);
+ return Err;
+ }
+
+ auto Err = Plugin::success();
+ AsyncInfoWrapper.finalize(Err);
+ if (Err)
+ return Err;
+ return Res;
+}
+
Error GenericDeviceTy::syncEvent(void *EventPtr) {
return syncEventImpl(EventPtr);
}
diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp
index c5f31670079ae..7649fd9285bb5 100644
--- a/offload/plugins-nextgen/cuda/src/rtl.cpp
+++ b/offload/plugins-nextgen/cuda/src/rtl.cpp
@@ -916,6 +916,11 @@ struct CUDADeviceTy : public GenericDeviceTy {
return Plugin::check(Res, "error in cuStreamWaitEvent: %s");
}
+ // TODO: This should be implementable on CUDA
+ Expected<bool> hasPendingWorkImpl(AsyncInfoWrapperTy &AsyncInfo) override {
+ return true;
+ }
+
/// Synchronize the current thread with the event.
Error syncEventImpl(void *EventPtr) override {
CUevent Event = reinterpret_cast<CUevent>(EventPtr);
diff --git a/offload/plugins-nextgen/host/src/rtl.cpp b/offload/plugins-nextgen/host/src/rtl.cpp
index d950572265b4c..9abc3507f6e68 100644
--- a/offload/plugins-nextgen/host/src/rtl.cpp
+++ b/offload/plugins-nextgen/host/src/rtl.cpp
@@ -333,6 +333,9 @@ struct GenELF64DeviceTy : public GenericDeviceTy {
AsyncInfoWrapperTy &AsyncInfoWrapper) override {
return Plugin::success();
}
+ Expected<bool> hasPendingWorkImpl(AsyncInfoWrapperTy &AsyncInfo) override {
+ return true;
+ }
Error syncEventImpl(void *EventPtr) override { return Plugin::success(); }
/// Print information about the device.
``````````
</details>
https://github.com/llvm/llvm-project/pull/152304
More information about the llvm-commits
mailing list