[llvm] [Offload] Don't create events for empty queues (PR #152304)

Wed Aug 6 06:03:14 PDT 2025

llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Ross Brunton (RossBrunton)

<details>
<summary>Changes</summary>

Add a device function to check if a device queue is empty. If liboffload
tries to create an event for an empty queue, we create an "empty" event
that is already complete.

This allows `olCreateEvent`, `olSyncEvent` and `olWaitEvent` to run
quickly for empty queues.


---
Full diff: https://github.com/llvm/llvm-project/pull/152304.diff


6 Files Affected:

- (modified) offload/liboffload/src/OffloadImpl.cpp (+20-4) 
- (modified) offload/plugins-nextgen/amdgpu/src/rtl.cpp (+11) 
- (modified) offload/plugins-nextgen/common/include/PluginInterface.h (+8) 
- (modified) offload/plugins-nextgen/common/src/PluginInterface.cpp (+15) 
- (modified) offload/plugins-nextgen/cuda/src/rtl.cpp (+5) 
- (modified) offload/plugins-nextgen/host/src/rtl.cpp (+3) 


``````````diff

diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp
index 6486b2b6d13a6..272a12ab59a06 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -72,6 +72,8 @@ struct ol_queue_impl_t {
 struct ol_event_impl_t {
   ol_event_impl_t(void *EventInfo, ol_queue_handle_t Queue)
       : EventInfo(EventInfo), Queue(Queue) {}
+  // EventInfo may be null, in which case the event should be considered always
+  // complete
   void *EventInfo;
   ol_queue_handle_t Queue;
 };
@@ -509,8 +511,8 @@ Error olWaitEvents_impl(ol_queue_handle_t Queue, ol_event_handle_t *Events,
       return Plugin::error(ErrorCode::INVALID_NULL_HANDLE,
                            "olWaitEvents asked to wait on a NULL event");
 
-    // Do nothing if the event is for this queue
-    if (Event->Queue == Queue)
+    // Do nothing if the event is for this queue or the event is always complete
+    if (Event->Queue == Queue || !Event->EventInfo)
       continue;
 
     if (auto Err = Device->waitEvent(Event->EventInfo, Queue->AsyncInfo))
@@ -548,6 +550,10 @@ Error olGetQueueInfoSize_impl(ol_queue_handle_t Queue, ol_queue_info_t PropName,
 }
 
 Error olSyncEvent_impl(ol_event_handle_t Event) {
+  if (!Event->EventInfo)
+    // Event always complete
+    return Plugin::success();
+
   if (auto Res = Event->Queue->Device->Device->syncEvent(Event->EventInfo))
     return Res;
 
@@ -555,8 +561,9 @@ Error olSyncEvent_impl(ol_event_handle_t Event) {
 }
 
 Error olDestroyEvent_impl(ol_event_handle_t Event) {
-  if (auto Res = Event->Queue->Device->Device->destroyEvent(Event->EventInfo))
-    return Res;
+  if (Event->EventInfo)
+    if (auto Res = Event->Queue->Device->Device->destroyEvent(Event->EventInfo))
+      return Res;
 
   return olDestroy(Event);
 }
@@ -590,7 +597,16 @@ Error olGetEventInfoSize_impl(ol_event_handle_t Event, ol_event_info_t PropName,
 }
 
 Error olCreateEvent_impl(ol_queue_handle_t Queue, ol_event_handle_t *EventOut) {
+  auto Pending = Queue->Device->Device->hasPendingWork(Queue->AsyncInfo);
+  if (auto Err = Pending.takeError())
+    return Err;
+
   *EventOut = new ol_event_impl_t(nullptr, Queue);
+  if (!*Pending)
+    // Queue is empty, don't record an event and consider the event always
+    // complete
+    return Plugin::success();
+
   if (auto Res = Queue->Device->Device->createEvent(&(*EventOut)->EventInfo))
     return Res;
 
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index b7bfa89fc9ea6..833c56df21ac3 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -2591,6 +2591,17 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
     return Event->wait(*Stream);
   }
 
+  Expected<bool> hasPendingWorkImpl(AsyncInfoWrapperTy &AsyncInfo) override {
+    auto Stream = AsyncInfo.getQueueAs<AMDGPUStreamTy *>();
+    if (!Stream)
+      return false;
+
+    auto Query = Stream->query();
+    if (Query)
+      return !*Query;
+    return Query.takeError();
+  }
+
   /// Synchronize the current thread with the event.
   Error syncEventImpl(void *EventPtr) override {
     AMDGPUEventTy *Event = reinterpret_cast<AMDGPUEventTy *>(EventPtr);
diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h
index 8c17a2ee07047..99e7a13ff2b37 100644
--- a/offload/plugins-nextgen/common/include/PluginInterface.h
+++ b/offload/plugins-nextgen/common/include/PluginInterface.h
@@ -939,6 +939,14 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
   Error printInfo();
   virtual Expected<InfoTreeNode> obtainInfoImpl() = 0;
 
+  /// Return true if the device has work that is either queued or currently
+  /// running
+  ///
+  /// Devices which cannot report this information should always return true
+  Expected<bool> hasPendingWork(__tgt_async_info *AsyncInfo);
+  virtual Expected<bool>
+  hasPendingWorkImpl(AsyncInfoWrapperTy &AsyncInfoWrapper) = 0;
+
   /// Getters of the grid values.
   uint32_t getWarpSize() const { return GridValues.GV_Warp_Size; }
   uint32_t getThreadLimit() const { return GridValues.GV_Max_WG_Size; }
diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp
index 94a050b559efe..cce2405cd51c4 100644
--- a/offload/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp
@@ -1623,6 +1623,21 @@ Error GenericDeviceTy::waitEvent(void *EventPtr, __tgt_async_info *AsyncInfo) {
   return Err;
 }
 
+Expected<bool> GenericDeviceTy::hasPendingWork(__tgt_async_info *AsyncInfo) {
+  AsyncInfoWrapperTy AsyncInfoWrapper(*this, AsyncInfo);
+  auto Res = hasPendingWorkImpl(AsyncInfoWrapper);
+  if (auto Err = Res.takeError()) {
+    AsyncInfoWrapper.finalize(Err);
+    return Err;
+  }
+
+  auto Err = Plugin::success();
+  AsyncInfoWrapper.finalize(Err);
+  if (Err)
+    return Err;
+  return Res;
+}
+
 Error GenericDeviceTy::syncEvent(void *EventPtr) {
   return syncEventImpl(EventPtr);
 }
diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp
index c5f31670079ae..7649fd9285bb5 100644
--- a/offload/plugins-nextgen/cuda/src/rtl.cpp
+++ b/offload/plugins-nextgen/cuda/src/rtl.cpp
@@ -916,6 +916,11 @@ struct CUDADeviceTy : public GenericDeviceTy {
     return Plugin::check(Res, "error in cuStreamWaitEvent: %s");
   }
 
+  // TODO: This should be implementable on CUDA
+  Expected<bool> hasPendingWorkImpl(AsyncInfoWrapperTy &AsyncInfo) override {
+    return true;
+  }
+
   /// Synchronize the current thread with the event.
   Error syncEventImpl(void *EventPtr) override {
     CUevent Event = reinterpret_cast<CUevent>(EventPtr);
diff --git a/offload/plugins-nextgen/host/src/rtl.cpp b/offload/plugins-nextgen/host/src/rtl.cpp
index d950572265b4c..9abc3507f6e68 100644
--- a/offload/plugins-nextgen/host/src/rtl.cpp
+++ b/offload/plugins-nextgen/host/src/rtl.cpp
@@ -333,6 +333,9 @@ struct GenELF64DeviceTy : public GenericDeviceTy {
                       AsyncInfoWrapperTy &AsyncInfoWrapper) override {
     return Plugin::success();
   }
+  Expected<bool> hasPendingWorkImpl(AsyncInfoWrapperTy &AsyncInfo) override {
+    return true;
+  }
   Error syncEventImpl(void *EventPtr) override { return Plugin::success(); }
 
   /// Print information about the device.

``````````

</details>


https://github.com/llvm/llvm-project/pull/152304