[llvm] [OFFLOAD] Add asynchronous queue query API for libomptarget migration (PR #172231)

Tue Jan 20 09:54:26 PST 2026

https://github.com/fineg74 updated https://github.com/llvm/llvm-project/pull/172231

>From a1080caff2f5cb9aced3b067ab2c25af5846c165 Mon Sep 17 00:00:00 2001
From: "Fine, Gregory" <gregory.fine at intel.com>
Date: Fri, 12 Dec 2025 16:49:28 -0800
Subject: [PATCH 1/6] add QueryAsync API

---
 offload/liboffload/API/Queue.td        | 9 +++++++++
 offload/liboffload/src/OffloadImpl.cpp | 9 +++++++++
 2 files changed, 18 insertions(+)

diff --git a/offload/liboffload/API/Queue.td b/offload/liboffload/API/Queue.td
index ededa9cc92fef..75df8f103e9f2 100644
--- a/offload/liboffload/API/Queue.td
+++ b/offload/liboffload/API/Queue.td
@@ -125,3 +125,12 @@ def olLaunchHostFunction : Function {
   ];
   let returns = [];
 }
+
+def olQueryAsync : Function {
+    let desc = "Query for device/queue/event based completion on in a non-blocking manner.";
+    let details = [];
+    let params = [
+        Param<"ol_queue_handle_t", "Queue", "handle of the queue", PARAM_IN>
+    ];
+    let returns = [];
+}
diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp
index eab9627217ca8..b46546414c602 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -1214,5 +1214,14 @@ Error olLaunchHostFunction_impl(ol_queue_handle_t Queue,
                                                 Queue->AsyncInfo);
 }
 
+Error olQueryAsync_impl(ol_queue_handle_t Queue) {
+  if (Queue->AsyncInfo->Queue) {
+    if (auto Err = Queue->Device->Device->queryAsync(Queue->AsyncInfo))
+      return Err;
+  }
+
+  return Error::success();
+}
+
 } // namespace offload
 } // namespace llvm

>From d40ea9ef40b8fc50619ef95e0c8dc2cc93a0d2d1 Mon Sep 17 00:00:00 2001
From: "Fine, Gregory" <gregory.fine at intel.com>
Date: Thu, 15 Jan 2026 17:12:55 -0800
Subject: [PATCH 2/6] Add test and more API details

---
 offload/liboffload/API/Queue.td                    | 7 +++++--
 offload/liboffload/src/OffloadImpl.cpp             | 1 +
 offload/unittests/OffloadAPI/queue/olSyncQueue.cpp | 4 ++++
 3 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/offload/liboffload/API/Queue.td b/offload/liboffload/API/Queue.td
index 75df8f103e9f2..fdc73a1b15388 100644
--- a/offload/liboffload/API/Queue.td
+++ b/offload/liboffload/API/Queue.td
@@ -127,8 +127,11 @@ def olLaunchHostFunction : Function {
 }
 
 def olQueryAsync : Function {
-    let desc = "Query for device/queue/event based completion on in a non-blocking manner.";
-    let details = [];
+    let desc = "Query for queue work completion on in a non-blocking manner.";
+    let details = [
+      "The function checks if a queue work has completed without blocking the calling thread.",
+      "If there work has completed the function would perform requred cleanup."
+    ];
     let params = [
         Param<"ol_queue_handle_t", "Queue", "handle of the queue", PARAM_IN>
     ];
diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp
index b1fe1abed768a..94507dfcd6b20 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -1236,6 +1236,7 @@ Error olQueryAsync_impl(ol_queue_handle_t Queue) {
     if (auto Err = Queue->Device->Device->queryAsync(Queue->AsyncInfo))
       return Err;
   }
+  return Error::success();
 }
 
 } // namespace offload
diff --git a/offload/unittests/OffloadAPI/queue/olSyncQueue.cpp b/offload/unittests/OffloadAPI/queue/olSyncQueue.cpp
index f07ebbdbaed82..866b6b33ec657 100644
--- a/offload/unittests/OffloadAPI/queue/olSyncQueue.cpp
+++ b/offload/unittests/OffloadAPI/queue/olSyncQueue.cpp
@@ -16,3 +16,7 @@ OFFLOAD_TESTS_INSTANTIATE_DEVICE_FIXTURE(olSyncQueueTest);
 TEST_P(olSyncQueueTest, SuccessEmptyQueue) {
   ASSERT_SUCCESS(olSyncQueue(Queue));
 }
+
+TEST_P(olSyncQueueTest, SuccessEmptyAsyncQueue) {
+  ASSERT_SUCCESS(olSyncQueue(Queue));
+}

>From 3d1f3b1dd0bfbd8430089208dc2d7045bc8ea6a4 Mon Sep 17 00:00:00 2001
From: "Fine, Gregory" <gregory.fine at intel.com>
Date: Fri, 16 Jan 2026 12:41:05 -0800
Subject: [PATCH 3/6] Address PR comments

---
 offload/liboffload/API/Queue.td                    | 8 ++++----
 offload/liboffload/src/OffloadImpl.cpp             | 2 +-
 offload/unittests/OffloadAPI/queue/olSyncQueue.cpp | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/offload/liboffload/API/Queue.td b/offload/liboffload/API/Queue.td
index fdc73a1b15388..d6653debffa1f 100644
--- a/offload/liboffload/API/Queue.td
+++ b/offload/liboffload/API/Queue.td
@@ -126,11 +126,11 @@ def olLaunchHostFunction : Function {
   let returns = [];
 }
 
-def olQueryAsync : Function {
-    let desc = "Query for queue work completion on in a non-blocking manner.";
+def olQueryQueue : Function {
+    let desc = "Query for queue work completion in a non-blocking manner.";
     let details = [
-      "The function checks if a queue work has completed without blocking the calling thread.",
-      "If there work has completed the function would perform requred cleanup."
+      "The function checks if a queue work has completed enqueued work without blocking the calling thread.",
+      "If enqueued work has completed the function would perform internal queue cleanup."
     ];
     let params = [
         Param<"ol_queue_handle_t", "Queue", "handle of the queue", PARAM_IN>
diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp
index 94507dfcd6b20..9e0691252cd89 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -1231,7 +1231,7 @@ Error olMemUnregister_impl(ol_device_handle_t Device, void *Ptr) {
   return Device->Device->dataUnlock(Ptr);
 }
 
-Error olQueryAsync_impl(ol_queue_handle_t Queue) {
+Error olQueryQueue_impl(ol_queue_handle_t Queue) {
   if (Queue->AsyncInfo->Queue) {
     if (auto Err = Queue->Device->Device->queryAsync(Queue->AsyncInfo))
       return Err;
diff --git a/offload/unittests/OffloadAPI/queue/olSyncQueue.cpp b/offload/unittests/OffloadAPI/queue/olSyncQueue.cpp
index 866b6b33ec657..2e130af77984c 100644
--- a/offload/unittests/OffloadAPI/queue/olSyncQueue.cpp
+++ b/offload/unittests/OffloadAPI/queue/olSyncQueue.cpp
@@ -18,5 +18,5 @@ TEST_P(olSyncQueueTest, SuccessEmptyQueue) {
 }
 
 TEST_P(olSyncQueueTest, SuccessEmptyAsyncQueue) {
-  ASSERT_SUCCESS(olSyncQueue(Queue));
+  ASSERT_SUCCESS(olQueryQueue(Queue));
 }

>From 3046de466fe10f40187efd65d27ed620053ef7dd Mon Sep 17 00:00:00 2001
From: "Fine, Gregory" <gregory.fine at intel.com>
Date: Mon, 19 Jan 2026 23:06:26 -0800
Subject: [PATCH 4/6] Address PR comments

---
 offload/liboffload/API/Queue.td               |  3 ++-
 offload/liboffload/src/OffloadImpl.cpp        |  4 ++--
 offload/plugins-nextgen/amdgpu/src/rtl.cpp    | 15 +++++++++---
 .../common/include/PluginInterface.h          |  7 ++++--
 .../common/src/PluginInterface.cpp            |  6 +++--
 offload/plugins-nextgen/cuda/src/rtl.cpp      | 18 ++++++++++----
 offload/plugins-nextgen/host/src/rtl.cpp      |  6 ++++-
 .../level_zero/include/L0Device.h             |  4 +++-
 .../level_zero/src/L0Device.cpp               | 19 +++++++++++----
 offload/test/CMakeLists.txt                   |  1 -
 offload/unittests/OffloadAPI/CMakeLists.txt   |  3 ++-
 .../OffloadAPI/queue/olQueryQueue.cpp         | 24 +++++++++++++++++++
 .../OffloadAPI/queue/olSyncQueue.cpp          |  4 ----
 13 files changed, 87 insertions(+), 27 deletions(-)
 create mode 100644 offload/unittests/OffloadAPI/queue/olQueryQueue.cpp

diff --git a/offload/liboffload/API/Queue.td b/offload/liboffload/API/Queue.td
index d6653debffa1f..daf1e5f2762a2 100644
--- a/offload/liboffload/API/Queue.td
+++ b/offload/liboffload/API/Queue.td
@@ -133,7 +133,8 @@ def olQueryQueue : Function {
       "If enqueued work has completed the function would perform internal queue cleanup."
     ];
     let params = [
-        Param<"ol_queue_handle_t", "Queue", "handle of the queue", PARAM_IN>
+        Param<"ol_queue_handle_t", "Queue", "handle of the queue", PARAM_IN>,
+        Param<"bool *", "IsQueueWorkCompleted", " A flag indicating if a queue work has completed", PARAM_OUT_OPTIONAL>
     ];
     let returns = [];
 }
diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp
index 9e0691252cd89..1906c7a2e941e 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -1231,9 +1231,9 @@ Error olMemUnregister_impl(ol_device_handle_t Device, void *Ptr) {
   return Device->Device->dataUnlock(Ptr);
 }
 
-Error olQueryQueue_impl(ol_queue_handle_t Queue) {
+Error olQueryQueue_impl(ol_queue_handle_t Queue, bool *IsQueueWorkCompleted) {
   if (Queue->AsyncInfo->Queue) {
-    if (auto Err = Queue->Device->Device->queryAsync(Queue->AsyncInfo))
+    if (auto Err = Queue->Device->Device->queryAsync(Queue->AsyncInfo, false, IsQueueWorkCompleted))
       return Err;
   }
   return Error::success();
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index 008fef6617a3f..7e4a7ce26921c 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -2430,7 +2430,11 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
   }
 
   /// Query for the completion of the pending operations on the async info.
-  Error queryAsyncImpl(__tgt_async_info &AsyncInfo) override {
+  Error queryAsyncImpl(__tgt_async_info &AsyncInfo,
+                       bool ReleaseQueue,
+                       bool *IsQueueWorkCompleted) override {
+    if (IsQueueWorkCompleted)
+      *IsQueueWorkCompleted = false;
     AMDGPUStreamTy *Stream =
         reinterpret_cast<AMDGPUStreamTy *>(AsyncInfo.Queue);
     assert(Stream && "Invalid stream");
@@ -2443,11 +2447,16 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
     if (!(*CompletedOrErr))
       return Plugin::success();
 
+    if (IsQueueWorkCompleted)
+      *IsQueueWorkCompleted = true;
     // Once the stream is completed, return it to stream pool and reset
     // AsyncInfo. This is to make sure the synchronization only works for its
     // own tasks.
-    AsyncInfo.Queue = nullptr;
-    return AMDGPUStreamManager.returnResource(Stream);
+    if (ReleaseQueue) {
+      AsyncInfo.Queue = nullptr;
+      return AMDGPUStreamManager.returnResource(Stream);
+    }
+    return Plugin::success();
   }
 
   /// Pin the host buffer and return the device pointer that should be used for
diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h
index caf86a921a914..be58b611d2c82 100644
--- a/offload/plugins-nextgen/common/include/PluginInterface.h
+++ b/offload/plugins-nextgen/common/include/PluginInterface.h
@@ -854,8 +854,11 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
 
   /// Query for the completion of the pending operations on the __tgt_async_info
   /// structure in a non-blocking manner.
-  Error queryAsync(__tgt_async_info *AsyncInfo);
-  virtual Error queryAsyncImpl(__tgt_async_info &AsyncInfo) = 0;
+  Error queryAsync(__tgt_async_info *AsyncInfo, bool ReleaseQueue = true,
+                   bool *IsQueueWorkCompleted = nullptr);
+  virtual Error queryAsyncImpl(__tgt_async_info &AsyncInfo,
+                               bool ReleaseQueue,
+                               bool *IsQueueWorkCompleted) = 0;
 
   /// Check whether the architecture supports VA management
   virtual bool supportVAManagement() const { return false; }
diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp
index 4ec836636b57d..2309298c125c2 100644
--- a/offload/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp
@@ -1198,12 +1198,14 @@ Error GenericDeviceTy::synchronize(__tgt_async_info *AsyncInfo,
   return Plugin::success();
 }
 
-Error GenericDeviceTy::queryAsync(__tgt_async_info *AsyncInfo) {
+Error GenericDeviceTy::queryAsync(__tgt_async_info *AsyncInfo,
+                                  bool ReleaseQueue,
+                                  bool *IsQueueWorkCompleted) {
   if (!AsyncInfo || !AsyncInfo->Queue)
     return Plugin::error(ErrorCode::INVALID_ARGUMENT,
                          "invalid async info queue");
 
-  return queryAsyncImpl(*AsyncInfo);
+  return queryAsyncImpl(*AsyncInfo, ReleaseQueue, IsQueueWorkCompleted);
 }
 
 Error GenericDeviceTy::memoryVAMap(void **Addr, void *VAddr, size_t *RSize) {
diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp
index 568f797058b46..318c9f970435b 100644
--- a/offload/plugins-nextgen/cuda/src/rtl.cpp
+++ b/offload/plugins-nextgen/cuda/src/rtl.cpp
@@ -792,20 +792,28 @@ struct CUDADeviceTy : public GenericDeviceTy {
   }
 
   /// Query for the completion of the pending operations on the async info.
-  Error queryAsyncImpl(__tgt_async_info &AsyncInfo) override {
+  Error queryAsyncImpl(__tgt_async_info &AsyncInfo,
+                       bool ReleaseQueue,
+                       bool *IsQueueWorkCompleted) override {
+    if (IsQueueWorkCompleted)
+      *IsQueueWorkCompleted = false;
     CUstream Stream = reinterpret_cast<CUstream>(AsyncInfo.Queue);
     CUresult Res = cuStreamQuery(Stream);
 
     // Not ready streams must be considered as successful operations.
     if (Res == CUDA_ERROR_NOT_READY)
       return Plugin::success();
-
+    
+    if (IsQueueWorkCompleted)
+      *IsQueueWorkCompleted = true;
     // Once the stream is synchronized and the operations completed (or an error
     // occurs), return it to stream pool and reset AsyncInfo. This is to make
     // sure the synchronization only works for its own tasks.
-    AsyncInfo.Queue = nullptr;
-    if (auto Err = CUDAStreamManager.returnResource(Stream))
-      return Err;
+    if (ReleaseQueue) {
+      AsyncInfo.Queue = nullptr;
+      if (auto Err = CUDAStreamManager.returnResource(Stream))
+        return Err;
+    }
 
     return Plugin::check(Res, "error in cuStreamQuery: %s");
   }
diff --git a/offload/plugins-nextgen/host/src/rtl.cpp b/offload/plugins-nextgen/host/src/rtl.cpp
index 81fbb671aa88f..30fe612c4bad1 100644
--- a/offload/plugins-nextgen/host/src/rtl.cpp
+++ b/offload/plugins-nextgen/host/src/rtl.cpp
@@ -336,7 +336,11 @@ struct GenELF64DeviceTy : public GenericDeviceTy {
 
   /// All functions are already synchronous. No need to do anything on this
   /// query function.
-  Error queryAsyncImpl(__tgt_async_info &AsyncInfo) override {
+  Error queryAsyncImpl(__tgt_async_info &AsyncInfo,
+                       bool ReleaseQueue,
+                       bool *IsQueueWorkCompleted) override {
+    if (IsQueueWorkCompleted)
+      *IsQueueWorkCompleted = true;
     return Plugin::success();
   }
 
diff --git a/offload/plugins-nextgen/level_zero/include/L0Device.h b/offload/plugins-nextgen/level_zero/include/L0Device.h
index d14e710ea0fcc..9bbe900c288af 100644
--- a/offload/plugins-nextgen/level_zero/include/L0Device.h
+++ b/offload/plugins-nextgen/level_zero/include/L0Device.h
@@ -576,7 +576,9 @@ class L0DeviceTy final : public GenericDeviceTy {
                      AsyncInfoWrapperTy &AsyncInfoWrapper) override;
   Error synchronizeImpl(__tgt_async_info &AsyncInfo,
                         bool ReleaseQueue) override;
-  Error queryAsyncImpl(__tgt_async_info &AsyncInfo) override;
+  Error queryAsyncImpl(__tgt_async_info &AsyncInfo,
+                       bool ReleaseQueue,
+                       bool *IsQueueWorkCompleted) override;
   Error dataSubmitImpl(void *TgtPtr, const void *HstPtr, int64_t Size,
                        AsyncInfoWrapperTy &AsyncInfoWrapper) override;
   Error dataRetrieveImpl(void *HstPtr, const void *TgtPtr, int64_t Size,
diff --git a/offload/plugins-nextgen/level_zero/src/L0Device.cpp b/offload/plugins-nextgen/level_zero/src/L0Device.cpp
index 2cae1e4899f37..fbe154d45afc8 100644
--- a/offload/plugins-nextgen/level_zero/src/L0Device.cpp
+++ b/offload/plugins-nextgen/level_zero/src/L0Device.cpp
@@ -356,10 +356,16 @@ L0DeviceTy::hasPendingWorkImpl(AsyncInfoWrapperTy &AsyncInfoWrapper) {
   return true;
 }
 
-Error L0DeviceTy::queryAsyncImpl(__tgt_async_info &AsyncInfo) {
+Error L0DeviceTy::queryAsyncImpl(__tgt_async_info &AsyncInfo,
+                                 bool ReleaseQueue,
+                                 bool *IsQueueWorkCompleted) {
+  if (IsQueueWorkCompleted)
+      *IsQueueWorkCompleted = true;
   const bool IsAsync = AsyncInfo.Queue && asyncEnabled();
   if (!IsAsync)
     return Plugin::success();
+  if (IsQueueWorkCompleted)
+      *IsQueueWorkCompleted = false;
 
   auto &Plugin = getPlugin();
   auto *AsyncQueue = static_cast<AsyncQueueTy *>(AsyncInfo.Queue);
@@ -367,6 +373,9 @@ Error L0DeviceTy::queryAsyncImpl(__tgt_async_info &AsyncInfo) {
   if (!AsyncQueue->WaitEvents.empty())
     return Plugin::success();
 
+  if (IsQueueWorkCompleted)
+      *IsQueueWorkCompleted = true;
+
   // Commit delayed USM2M copies.
   for (auto &USM2M : AsyncQueue->USM2MList) {
     std::copy_n(static_cast<const char *>(std::get<0>(USM2M)),
@@ -377,9 +386,11 @@ Error L0DeviceTy::queryAsyncImpl(__tgt_async_info &AsyncInfo) {
     std::copy_n(static_cast<char *>(std::get<0>(H2M)), std::get<2>(H2M),
                 static_cast<char *>(std::get<1>(H2M)));
   }
-  Plugin.releaseAsyncQueue(AsyncQueue);
-  getStagingBuffer().reset();
-  AsyncInfo.Queue = nullptr;
+  if (ReleaseQueue) {
+    Plugin.releaseAsyncQueue(AsyncQueue);
+    getStagingBuffer().reset();
+    AsyncInfo.Queue = nullptr;
+  }
 
   return Plugin::success();
 }
diff --git a/offload/test/CMakeLists.txt b/offload/test/CMakeLists.txt
index 711621de9075d..e17bcfb4acaae 100644
--- a/offload/test/CMakeLists.txt
+++ b/offload/test/CMakeLists.txt
@@ -73,5 +73,4 @@ configure_lit_site_cfg(
 
 add_lit_testsuite(check-offload-unit "Running offload unittest suites"
   ${CMAKE_CURRENT_BINARY_DIR}/unit
-  EXCLUDE_FROM_CHECK_ALL
   DEPENDS LLVMOffload OffloadUnitTests)
diff --git a/offload/unittests/OffloadAPI/CMakeLists.txt b/offload/unittests/OffloadAPI/CMakeLists.txt
index 546d7dac692d3..fce38c5fd75ae 100644
--- a/offload/unittests/OffloadAPI/CMakeLists.txt
+++ b/offload/unittests/OffloadAPI/CMakeLists.txt
@@ -48,7 +48,8 @@ add_offload_unittest("queue"
     queue/olGetQueueInfo.cpp
     queue/olGetQueueInfoSize.cpp
     queue/olWaitEvents.cpp
-    queue/olLaunchHostFunction.cpp)
+    queue/olLaunchHostFunction.cpp
+    queue/olQueryQueue.cpp)
 
 add_offload_unittest("symbol"
     symbol/olGetSymbol.cpp
diff --git a/offload/unittests/OffloadAPI/queue/olQueryQueue.cpp b/offload/unittests/OffloadAPI/queue/olQueryQueue.cpp
new file mode 100644
index 0000000000000..766269bab29ea
--- /dev/null
+++ b/offload/unittests/OffloadAPI/queue/olQueryQueue.cpp
@@ -0,0 +1,24 @@
+//===------- Offload API tests - olQueryQueue ------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "../common/Fixtures.hpp"
+#include <OffloadAPI.h>
+#include <gtest/gtest.h>
+
+using olQueryQueueTest = OffloadQueueTest;
+OFFLOAD_TESTS_INSTANTIATE_DEVICE_FIXTURE(olQueryQueueTest);
+
+TEST_P(olQueryQueueTest, SuccessEmptyAsyncQueue) {
+  ASSERT_SUCCESS(olQueryQueue(Queue, nullptr));
+}
+
+TEST_P(olQueryQueueTest, SuccessEmptyAsyncQueueCheckResult) {
+  bool IsQueueWorkCompleted;
+  ASSERT_SUCCESS(olQueryQueue(Queue, &IsQueueWorkCompleted));
+  ASSERT_TRUE(IsQueueWorkCompleted);
+}
diff --git a/offload/unittests/OffloadAPI/queue/olSyncQueue.cpp b/offload/unittests/OffloadAPI/queue/olSyncQueue.cpp
index 2e130af77984c..f07ebbdbaed82 100644
--- a/offload/unittests/OffloadAPI/queue/olSyncQueue.cpp
+++ b/offload/unittests/OffloadAPI/queue/olSyncQueue.cpp
@@ -16,7 +16,3 @@ OFFLOAD_TESTS_INSTANTIATE_DEVICE_FIXTURE(olSyncQueueTest);
 TEST_P(olSyncQueueTest, SuccessEmptyQueue) {
   ASSERT_SUCCESS(olSyncQueue(Queue));
 }
-
-TEST_P(olSyncQueueTest, SuccessEmptyAsyncQueue) {
-  ASSERT_SUCCESS(olQueryQueue(Queue));
-}

>From 1fd674c50ae311123042c528f065a35eeb69acb5 Mon Sep 17 00:00:00 2001
From: "Fine, Gregory" <gregory.fine at intel.com>
Date: Mon, 19 Jan 2026 23:08:29 -0800
Subject: [PATCH 5/6] Revert spurious change

---
 offload/test/CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/offload/test/CMakeLists.txt b/offload/test/CMakeLists.txt
index e17bcfb4acaae..711621de9075d 100644
--- a/offload/test/CMakeLists.txt
+++ b/offload/test/CMakeLists.txt
@@ -73,4 +73,5 @@ configure_lit_site_cfg(
 
 add_lit_testsuite(check-offload-unit "Running offload unittest suites"
   ${CMAKE_CURRENT_BINARY_DIR}/unit
+  EXCLUDE_FROM_CHECK_ALL
   DEPENDS LLVMOffload OffloadUnitTests)

>From e323516d861f0ca5c1d99843b80a33581bd8c4f2 Mon Sep 17 00:00:00 2001
From: "Fine, Gregory" <gregory.fine at intel.com>
Date: Tue, 20 Jan 2026 09:54:13 -0800
Subject: [PATCH 6/6] Address PR comments and fix formatting

---
 offload/liboffload/API/Queue.td                          | 5 ++---
 offload/liboffload/src/OffloadImpl.cpp                   | 3 ++-
 offload/plugins-nextgen/amdgpu/src/rtl.cpp               | 3 +--
 offload/plugins-nextgen/common/include/PluginInterface.h | 3 +--
 offload/plugins-nextgen/cuda/src/rtl.cpp                 | 5 ++---
 offload/plugins-nextgen/host/src/rtl.cpp                 | 3 +--
 offload/plugins-nextgen/level_zero/include/L0Device.h    | 3 +--
 offload/plugins-nextgen/level_zero/src/L0Device.cpp      | 9 ++++-----
 offload/unittests/OffloadAPI/queue/olQueryQueue.cpp      | 4 ++--
 9 files changed, 16 insertions(+), 22 deletions(-)

diff --git a/offload/liboffload/API/Queue.td b/offload/liboffload/API/Queue.td
index daf1e5f2762a2..4008432375753 100644
--- a/offload/liboffload/API/Queue.td
+++ b/offload/liboffload/API/Queue.td
@@ -129,12 +129,11 @@ def olLaunchHostFunction : Function {
 def olQueryQueue : Function {
     let desc = "Query for queue work completion in a non-blocking manner.";
     let details = [
-      "The function checks if a queue work has completed enqueued work without blocking the calling thread.",
-      "If enqueued work has completed the function would perform internal queue cleanup."
+      "The function checks if a queue work has completed enqueued work without blocking the calling thread."
     ];
     let params = [
         Param<"ol_queue_handle_t", "Queue", "handle of the queue", PARAM_IN>,
-        Param<"bool *", "IsQueueWorkCompleted", " A flag indicating if a queue work has completed", PARAM_OUT_OPTIONAL>
+        Param<"bool *", "IsQueueWorkCompleted", " A flag indicating if the queue work has completed", PARAM_OUT_OPTIONAL>
     ];
     let returns = [];
 }
diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp
index 1906c7a2e941e..cf38f7280ec37 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -1233,7 +1233,8 @@ Error olMemUnregister_impl(ol_device_handle_t Device, void *Ptr) {
 
 Error olQueryQueue_impl(ol_queue_handle_t Queue, bool *IsQueueWorkCompleted) {
   if (Queue->AsyncInfo->Queue) {
-    if (auto Err = Queue->Device->Device->queryAsync(Queue->AsyncInfo, false, IsQueueWorkCompleted))
+    if (auto Err = Queue->Device->Device->queryAsync(Queue->AsyncInfo, false,
+                                                     IsQueueWorkCompleted))
       return Err;
   }
   return Error::success();
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index 7e4a7ce26921c..4cc976dae1ed1 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -2430,8 +2430,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
   }
 
   /// Query for the completion of the pending operations on the async info.
-  Error queryAsyncImpl(__tgt_async_info &AsyncInfo,
-                       bool ReleaseQueue,
+  Error queryAsyncImpl(__tgt_async_info &AsyncInfo, bool ReleaseQueue,
                        bool *IsQueueWorkCompleted) override {
     if (IsQueueWorkCompleted)
       *IsQueueWorkCompleted = false;
diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h
index be58b611d2c82..19db44cf04655 100644
--- a/offload/plugins-nextgen/common/include/PluginInterface.h
+++ b/offload/plugins-nextgen/common/include/PluginInterface.h
@@ -856,8 +856,7 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
   /// structure in a non-blocking manner.
   Error queryAsync(__tgt_async_info *AsyncInfo, bool ReleaseQueue = true,
                    bool *IsQueueWorkCompleted = nullptr);
-  virtual Error queryAsyncImpl(__tgt_async_info &AsyncInfo,
-                               bool ReleaseQueue,
+  virtual Error queryAsyncImpl(__tgt_async_info &AsyncInfo, bool ReleaseQueue,
                                bool *IsQueueWorkCompleted) = 0;
 
   /// Check whether the architecture supports VA management
diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp
index 318c9f970435b..657dec89670e7 100644
--- a/offload/plugins-nextgen/cuda/src/rtl.cpp
+++ b/offload/plugins-nextgen/cuda/src/rtl.cpp
@@ -792,8 +792,7 @@ struct CUDADeviceTy : public GenericDeviceTy {
   }
 
   /// Query for the completion of the pending operations on the async info.
-  Error queryAsyncImpl(__tgt_async_info &AsyncInfo,
-                       bool ReleaseQueue,
+  Error queryAsyncImpl(__tgt_async_info &AsyncInfo, bool ReleaseQueue,
                        bool *IsQueueWorkCompleted) override {
     if (IsQueueWorkCompleted)
       *IsQueueWorkCompleted = false;
@@ -803,7 +802,7 @@ struct CUDADeviceTy : public GenericDeviceTy {
     // Not ready streams must be considered as successful operations.
     if (Res == CUDA_ERROR_NOT_READY)
       return Plugin::success();
-    
+
     if (IsQueueWorkCompleted)
       *IsQueueWorkCompleted = true;
     // Once the stream is synchronized and the operations completed (or an error
diff --git a/offload/plugins-nextgen/host/src/rtl.cpp b/offload/plugins-nextgen/host/src/rtl.cpp
index 30fe612c4bad1..603379630ae8e 100644
--- a/offload/plugins-nextgen/host/src/rtl.cpp
+++ b/offload/plugins-nextgen/host/src/rtl.cpp
@@ -336,8 +336,7 @@ struct GenELF64DeviceTy : public GenericDeviceTy {
 
   /// All functions are already synchronous. No need to do anything on this
   /// query function.
-  Error queryAsyncImpl(__tgt_async_info &AsyncInfo,
-                       bool ReleaseQueue,
+  Error queryAsyncImpl(__tgt_async_info &AsyncInfo, bool ReleaseQueue,
                        bool *IsQueueWorkCompleted) override {
     if (IsQueueWorkCompleted)
       *IsQueueWorkCompleted = true;
diff --git a/offload/plugins-nextgen/level_zero/include/L0Device.h b/offload/plugins-nextgen/level_zero/include/L0Device.h
index 9bbe900c288af..001a41ba77d7b 100644
--- a/offload/plugins-nextgen/level_zero/include/L0Device.h
+++ b/offload/plugins-nextgen/level_zero/include/L0Device.h
@@ -576,8 +576,7 @@ class L0DeviceTy final : public GenericDeviceTy {
                      AsyncInfoWrapperTy &AsyncInfoWrapper) override;
   Error synchronizeImpl(__tgt_async_info &AsyncInfo,
                         bool ReleaseQueue) override;
-  Error queryAsyncImpl(__tgt_async_info &AsyncInfo,
-                       bool ReleaseQueue,
+  Error queryAsyncImpl(__tgt_async_info &AsyncInfo, bool ReleaseQueue,
                        bool *IsQueueWorkCompleted) override;
   Error dataSubmitImpl(void *TgtPtr, const void *HstPtr, int64_t Size,
                        AsyncInfoWrapperTy &AsyncInfoWrapper) override;
diff --git a/offload/plugins-nextgen/level_zero/src/L0Device.cpp b/offload/plugins-nextgen/level_zero/src/L0Device.cpp
index fbe154d45afc8..24dfbd6654730 100644
--- a/offload/plugins-nextgen/level_zero/src/L0Device.cpp
+++ b/offload/plugins-nextgen/level_zero/src/L0Device.cpp
@@ -356,16 +356,15 @@ L0DeviceTy::hasPendingWorkImpl(AsyncInfoWrapperTy &AsyncInfoWrapper) {
   return true;
 }
 
-Error L0DeviceTy::queryAsyncImpl(__tgt_async_info &AsyncInfo,
-                                 bool ReleaseQueue,
+Error L0DeviceTy::queryAsyncImpl(__tgt_async_info &AsyncInfo, bool ReleaseQueue,
                                  bool *IsQueueWorkCompleted) {
   if (IsQueueWorkCompleted)
-      *IsQueueWorkCompleted = true;
+    *IsQueueWorkCompleted = true;
   const bool IsAsync = AsyncInfo.Queue && asyncEnabled();
   if (!IsAsync)
     return Plugin::success();
   if (IsQueueWorkCompleted)
-      *IsQueueWorkCompleted = false;
+    *IsQueueWorkCompleted = false;
 
   auto &Plugin = getPlugin();
   auto *AsyncQueue = static_cast<AsyncQueueTy *>(AsyncInfo.Queue);
@@ -374,7 +373,7 @@ Error L0DeviceTy::queryAsyncImpl(__tgt_async_info &AsyncInfo,
     return Plugin::success();
 
   if (IsQueueWorkCompleted)
-      *IsQueueWorkCompleted = true;
+    *IsQueueWorkCompleted = true;
 
   // Commit delayed USM2M copies.
   for (auto &USM2M : AsyncQueue->USM2MList) {
diff --git a/offload/unittests/OffloadAPI/queue/olQueryQueue.cpp b/offload/unittests/OffloadAPI/queue/olQueryQueue.cpp
index 766269bab29ea..4efd939f88bba 100644
--- a/offload/unittests/OffloadAPI/queue/olQueryQueue.cpp
+++ b/offload/unittests/OffloadAPI/queue/olQueryQueue.cpp
@@ -1,10 +1,10 @@
-//===------- Offload API tests - olQueryQueue ------------------------------===//
+//===------- Offload API tests - olQueryQueue ----------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
-//===----------------------------------------------------------------------===//
+//===--------------------------------------------------------------------===//
 
 #include "../common/Fixtures.hpp"
 #include <OffloadAPI.h>