[llvm] [OFFLOAD] Add asynchronous queue query API for libomptarget migration (PR #172231)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 20 09:54:26 PST 2026
https://github.com/fineg74 updated https://github.com/llvm/llvm-project/pull/172231
>From a1080caff2f5cb9aced3b067ab2c25af5846c165 Mon Sep 17 00:00:00 2001
From: "Fine, Gregory" <gregory.fine at intel.com>
Date: Fri, 12 Dec 2025 16:49:28 -0800
Subject: [PATCH 1/6] add QueryAsync API
---
offload/liboffload/API/Queue.td | 9 +++++++++
offload/liboffload/src/OffloadImpl.cpp | 9 +++++++++
2 files changed, 18 insertions(+)
diff --git a/offload/liboffload/API/Queue.td b/offload/liboffload/API/Queue.td
index ededa9cc92fef..75df8f103e9f2 100644
--- a/offload/liboffload/API/Queue.td
+++ b/offload/liboffload/API/Queue.td
@@ -125,3 +125,12 @@ def olLaunchHostFunction : Function {
];
let returns = [];
}
+
+def olQueryAsync : Function {
+ let desc = "Query for device/queue/event based completion on in a non-blocking manner.";
+ let details = [];
+ let params = [
+ Param<"ol_queue_handle_t", "Queue", "handle of the queue", PARAM_IN>
+ ];
+ let returns = [];
+}
diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp
index eab9627217ca8..b46546414c602 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -1214,5 +1214,14 @@ Error olLaunchHostFunction_impl(ol_queue_handle_t Queue,
Queue->AsyncInfo);
}
+Error olQueryAsync_impl(ol_queue_handle_t Queue) {
+ if (Queue->AsyncInfo->Queue) {
+ if (auto Err = Queue->Device->Device->queryAsync(Queue->AsyncInfo))
+ return Err;
+ }
+
+ return Error::success();
+}
+
} // namespace offload
} // namespace llvm
>From d40ea9ef40b8fc50619ef95e0c8dc2cc93a0d2d1 Mon Sep 17 00:00:00 2001
From: "Fine, Gregory" <gregory.fine at intel.com>
Date: Thu, 15 Jan 2026 17:12:55 -0800
Subject: [PATCH 2/6] Add test and more API details
---
offload/liboffload/API/Queue.td | 7 +++++--
offload/liboffload/src/OffloadImpl.cpp | 1 +
offload/unittests/OffloadAPI/queue/olSyncQueue.cpp | 4 ++++
3 files changed, 10 insertions(+), 2 deletions(-)
diff --git a/offload/liboffload/API/Queue.td b/offload/liboffload/API/Queue.td
index 75df8f103e9f2..fdc73a1b15388 100644
--- a/offload/liboffload/API/Queue.td
+++ b/offload/liboffload/API/Queue.td
@@ -127,8 +127,11 @@ def olLaunchHostFunction : Function {
}
def olQueryAsync : Function {
- let desc = "Query for device/queue/event based completion on in a non-blocking manner.";
- let details = [];
+ let desc = "Query for queue work completion on in a non-blocking manner.";
+ let details = [
+ "The function checks if a queue work has completed without blocking the calling thread.",
+ "If there work has completed the function would perform requred cleanup."
+ ];
let params = [
Param<"ol_queue_handle_t", "Queue", "handle of the queue", PARAM_IN>
];
diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp
index b1fe1abed768a..94507dfcd6b20 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -1236,6 +1236,7 @@ Error olQueryAsync_impl(ol_queue_handle_t Queue) {
if (auto Err = Queue->Device->Device->queryAsync(Queue->AsyncInfo))
return Err;
}
+ return Error::success();
}
} // namespace offload
diff --git a/offload/unittests/OffloadAPI/queue/olSyncQueue.cpp b/offload/unittests/OffloadAPI/queue/olSyncQueue.cpp
index f07ebbdbaed82..866b6b33ec657 100644
--- a/offload/unittests/OffloadAPI/queue/olSyncQueue.cpp
+++ b/offload/unittests/OffloadAPI/queue/olSyncQueue.cpp
@@ -16,3 +16,7 @@ OFFLOAD_TESTS_INSTANTIATE_DEVICE_FIXTURE(olSyncQueueTest);
TEST_P(olSyncQueueTest, SuccessEmptyQueue) {
ASSERT_SUCCESS(olSyncQueue(Queue));
}
+
+TEST_P(olSyncQueueTest, SuccessEmptyAsyncQueue) {
+ ASSERT_SUCCESS(olSyncQueue(Queue));
+}
>From 3d1f3b1dd0bfbd8430089208dc2d7045bc8ea6a4 Mon Sep 17 00:00:00 2001
From: "Fine, Gregory" <gregory.fine at intel.com>
Date: Fri, 16 Jan 2026 12:41:05 -0800
Subject: [PATCH 3/6] Address PR comments
---
offload/liboffload/API/Queue.td | 8 ++++----
offload/liboffload/src/OffloadImpl.cpp | 2 +-
offload/unittests/OffloadAPI/queue/olSyncQueue.cpp | 2 +-
3 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/offload/liboffload/API/Queue.td b/offload/liboffload/API/Queue.td
index fdc73a1b15388..d6653debffa1f 100644
--- a/offload/liboffload/API/Queue.td
+++ b/offload/liboffload/API/Queue.td
@@ -126,11 +126,11 @@ def olLaunchHostFunction : Function {
let returns = [];
}
-def olQueryAsync : Function {
- let desc = "Query for queue work completion on in a non-blocking manner.";
+def olQueryQueue : Function {
+ let desc = "Query for queue work completion in a non-blocking manner.";
let details = [
- "The function checks if a queue work has completed without blocking the calling thread.",
- "If there work has completed the function would perform requred cleanup."
+ "The function checks if a queue work has completed enqueued work without blocking the calling thread.",
+ "If enqueued work has completed the function would perform internal queue cleanup."
];
let params = [
Param<"ol_queue_handle_t", "Queue", "handle of the queue", PARAM_IN>
diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp
index 94507dfcd6b20..9e0691252cd89 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -1231,7 +1231,7 @@ Error olMemUnregister_impl(ol_device_handle_t Device, void *Ptr) {
return Device->Device->dataUnlock(Ptr);
}
-Error olQueryAsync_impl(ol_queue_handle_t Queue) {
+Error olQueryQueue_impl(ol_queue_handle_t Queue) {
if (Queue->AsyncInfo->Queue) {
if (auto Err = Queue->Device->Device->queryAsync(Queue->AsyncInfo))
return Err;
diff --git a/offload/unittests/OffloadAPI/queue/olSyncQueue.cpp b/offload/unittests/OffloadAPI/queue/olSyncQueue.cpp
index 866b6b33ec657..2e130af77984c 100644
--- a/offload/unittests/OffloadAPI/queue/olSyncQueue.cpp
+++ b/offload/unittests/OffloadAPI/queue/olSyncQueue.cpp
@@ -18,5 +18,5 @@ TEST_P(olSyncQueueTest, SuccessEmptyQueue) {
}
TEST_P(olSyncQueueTest, SuccessEmptyAsyncQueue) {
- ASSERT_SUCCESS(olSyncQueue(Queue));
+ ASSERT_SUCCESS(olQueryQueue(Queue));
}
>From 3046de466fe10f40187efd65d27ed620053ef7dd Mon Sep 17 00:00:00 2001
From: "Fine, Gregory" <gregory.fine at intel.com>
Date: Mon, 19 Jan 2026 23:06:26 -0800
Subject: [PATCH 4/6] Address PR comments
---
offload/liboffload/API/Queue.td | 3 ++-
offload/liboffload/src/OffloadImpl.cpp | 4 ++--
offload/plugins-nextgen/amdgpu/src/rtl.cpp | 15 +++++++++---
.../common/include/PluginInterface.h | 7 ++++--
.../common/src/PluginInterface.cpp | 6 +++--
offload/plugins-nextgen/cuda/src/rtl.cpp | 18 ++++++++++----
offload/plugins-nextgen/host/src/rtl.cpp | 6 ++++-
.../level_zero/include/L0Device.h | 4 +++-
.../level_zero/src/L0Device.cpp | 19 +++++++++++----
offload/test/CMakeLists.txt | 1 -
offload/unittests/OffloadAPI/CMakeLists.txt | 3 ++-
.../OffloadAPI/queue/olQueryQueue.cpp | 24 +++++++++++++++++++
.../OffloadAPI/queue/olSyncQueue.cpp | 4 ----
13 files changed, 87 insertions(+), 27 deletions(-)
create mode 100644 offload/unittests/OffloadAPI/queue/olQueryQueue.cpp
diff --git a/offload/liboffload/API/Queue.td b/offload/liboffload/API/Queue.td
index d6653debffa1f..daf1e5f2762a2 100644
--- a/offload/liboffload/API/Queue.td
+++ b/offload/liboffload/API/Queue.td
@@ -133,7 +133,8 @@ def olQueryQueue : Function {
"If enqueued work has completed the function would perform internal queue cleanup."
];
let params = [
- Param<"ol_queue_handle_t", "Queue", "handle of the queue", PARAM_IN>
+ Param<"ol_queue_handle_t", "Queue", "handle of the queue", PARAM_IN>,
+ Param<"bool *", "IsQueueWorkCompleted", " A flag indicating if a queue work has completed", PARAM_OUT_OPTIONAL>
];
let returns = [];
}
diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp
index 9e0691252cd89..1906c7a2e941e 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -1231,9 +1231,9 @@ Error olMemUnregister_impl(ol_device_handle_t Device, void *Ptr) {
return Device->Device->dataUnlock(Ptr);
}
-Error olQueryQueue_impl(ol_queue_handle_t Queue) {
+Error olQueryQueue_impl(ol_queue_handle_t Queue, bool *IsQueueWorkCompleted) {
if (Queue->AsyncInfo->Queue) {
- if (auto Err = Queue->Device->Device->queryAsync(Queue->AsyncInfo))
+ if (auto Err = Queue->Device->Device->queryAsync(Queue->AsyncInfo, false, IsQueueWorkCompleted))
return Err;
}
return Error::success();
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index 008fef6617a3f..7e4a7ce26921c 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -2430,7 +2430,11 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
}
/// Query for the completion of the pending operations on the async info.
- Error queryAsyncImpl(__tgt_async_info &AsyncInfo) override {
+ Error queryAsyncImpl(__tgt_async_info &AsyncInfo,
+ bool ReleaseQueue,
+ bool *IsQueueWorkCompleted) override {
+ if (IsQueueWorkCompleted)
+ *IsQueueWorkCompleted = false;
AMDGPUStreamTy *Stream =
reinterpret_cast<AMDGPUStreamTy *>(AsyncInfo.Queue);
assert(Stream && "Invalid stream");
@@ -2443,11 +2447,16 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
if (!(*CompletedOrErr))
return Plugin::success();
+ if (IsQueueWorkCompleted)
+ *IsQueueWorkCompleted = true;
// Once the stream is completed, return it to stream pool and reset
// AsyncInfo. This is to make sure the synchronization only works for its
// own tasks.
- AsyncInfo.Queue = nullptr;
- return AMDGPUStreamManager.returnResource(Stream);
+ if (ReleaseQueue) {
+ AsyncInfo.Queue = nullptr;
+ return AMDGPUStreamManager.returnResource(Stream);
+ }
+ return Plugin::success();
}
/// Pin the host buffer and return the device pointer that should be used for
diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h
index caf86a921a914..be58b611d2c82 100644
--- a/offload/plugins-nextgen/common/include/PluginInterface.h
+++ b/offload/plugins-nextgen/common/include/PluginInterface.h
@@ -854,8 +854,11 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
/// Query for the completion of the pending operations on the __tgt_async_info
/// structure in a non-blocking manner.
- Error queryAsync(__tgt_async_info *AsyncInfo);
- virtual Error queryAsyncImpl(__tgt_async_info &AsyncInfo) = 0;
+ Error queryAsync(__tgt_async_info *AsyncInfo, bool ReleaseQueue = true,
+ bool *IsQueueWorkCompleted = nullptr);
+ virtual Error queryAsyncImpl(__tgt_async_info &AsyncInfo,
+ bool ReleaseQueue,
+ bool *IsQueueWorkCompleted) = 0;
/// Check whether the architecture supports VA management
virtual bool supportVAManagement() const { return false; }
diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp
index 4ec836636b57d..2309298c125c2 100644
--- a/offload/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp
@@ -1198,12 +1198,14 @@ Error GenericDeviceTy::synchronize(__tgt_async_info *AsyncInfo,
return Plugin::success();
}
-Error GenericDeviceTy::queryAsync(__tgt_async_info *AsyncInfo) {
+Error GenericDeviceTy::queryAsync(__tgt_async_info *AsyncInfo,
+ bool ReleaseQueue,
+ bool *IsQueueWorkCompleted) {
if (!AsyncInfo || !AsyncInfo->Queue)
return Plugin::error(ErrorCode::INVALID_ARGUMENT,
"invalid async info queue");
- return queryAsyncImpl(*AsyncInfo);
+ return queryAsyncImpl(*AsyncInfo, ReleaseQueue, IsQueueWorkCompleted);
}
Error GenericDeviceTy::memoryVAMap(void **Addr, void *VAddr, size_t *RSize) {
diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp
index 568f797058b46..318c9f970435b 100644
--- a/offload/plugins-nextgen/cuda/src/rtl.cpp
+++ b/offload/plugins-nextgen/cuda/src/rtl.cpp
@@ -792,20 +792,28 @@ struct CUDADeviceTy : public GenericDeviceTy {
}
/// Query for the completion of the pending operations on the async info.
- Error queryAsyncImpl(__tgt_async_info &AsyncInfo) override {
+ Error queryAsyncImpl(__tgt_async_info &AsyncInfo,
+ bool ReleaseQueue,
+ bool *IsQueueWorkCompleted) override {
+ if (IsQueueWorkCompleted)
+ *IsQueueWorkCompleted = false;
CUstream Stream = reinterpret_cast<CUstream>(AsyncInfo.Queue);
CUresult Res = cuStreamQuery(Stream);
// Not ready streams must be considered as successful operations.
if (Res == CUDA_ERROR_NOT_READY)
return Plugin::success();
-
+
+ if (IsQueueWorkCompleted)
+ *IsQueueWorkCompleted = true;
// Once the stream is synchronized and the operations completed (or an error
// occurs), return it to stream pool and reset AsyncInfo. This is to make
// sure the synchronization only works for its own tasks.
- AsyncInfo.Queue = nullptr;
- if (auto Err = CUDAStreamManager.returnResource(Stream))
- return Err;
+ if (ReleaseQueue) {
+ AsyncInfo.Queue = nullptr;
+ if (auto Err = CUDAStreamManager.returnResource(Stream))
+ return Err;
+ }
return Plugin::check(Res, "error in cuStreamQuery: %s");
}
diff --git a/offload/plugins-nextgen/host/src/rtl.cpp b/offload/plugins-nextgen/host/src/rtl.cpp
index 81fbb671aa88f..30fe612c4bad1 100644
--- a/offload/plugins-nextgen/host/src/rtl.cpp
+++ b/offload/plugins-nextgen/host/src/rtl.cpp
@@ -336,7 +336,11 @@ struct GenELF64DeviceTy : public GenericDeviceTy {
/// All functions are already synchronous. No need to do anything on this
/// query function.
- Error queryAsyncImpl(__tgt_async_info &AsyncInfo) override {
+ Error queryAsyncImpl(__tgt_async_info &AsyncInfo,
+ bool ReleaseQueue,
+ bool *IsQueueWorkCompleted) override {
+ if (IsQueueWorkCompleted)
+ *IsQueueWorkCompleted = true;
return Plugin::success();
}
diff --git a/offload/plugins-nextgen/level_zero/include/L0Device.h b/offload/plugins-nextgen/level_zero/include/L0Device.h
index d14e710ea0fcc..9bbe900c288af 100644
--- a/offload/plugins-nextgen/level_zero/include/L0Device.h
+++ b/offload/plugins-nextgen/level_zero/include/L0Device.h
@@ -576,7 +576,9 @@ class L0DeviceTy final : public GenericDeviceTy {
AsyncInfoWrapperTy &AsyncInfoWrapper) override;
Error synchronizeImpl(__tgt_async_info &AsyncInfo,
bool ReleaseQueue) override;
- Error queryAsyncImpl(__tgt_async_info &AsyncInfo) override;
+ Error queryAsyncImpl(__tgt_async_info &AsyncInfo,
+ bool ReleaseQueue,
+ bool *IsQueueWorkCompleted) override;
Error dataSubmitImpl(void *TgtPtr, const void *HstPtr, int64_t Size,
AsyncInfoWrapperTy &AsyncInfoWrapper) override;
Error dataRetrieveImpl(void *HstPtr, const void *TgtPtr, int64_t Size,
diff --git a/offload/plugins-nextgen/level_zero/src/L0Device.cpp b/offload/plugins-nextgen/level_zero/src/L0Device.cpp
index 2cae1e4899f37..fbe154d45afc8 100644
--- a/offload/plugins-nextgen/level_zero/src/L0Device.cpp
+++ b/offload/plugins-nextgen/level_zero/src/L0Device.cpp
@@ -356,10 +356,16 @@ L0DeviceTy::hasPendingWorkImpl(AsyncInfoWrapperTy &AsyncInfoWrapper) {
return true;
}
-Error L0DeviceTy::queryAsyncImpl(__tgt_async_info &AsyncInfo) {
+Error L0DeviceTy::queryAsyncImpl(__tgt_async_info &AsyncInfo,
+ bool ReleaseQueue,
+ bool *IsQueueWorkCompleted) {
+ if (IsQueueWorkCompleted)
+ *IsQueueWorkCompleted = true;
const bool IsAsync = AsyncInfo.Queue && asyncEnabled();
if (!IsAsync)
return Plugin::success();
+ if (IsQueueWorkCompleted)
+ *IsQueueWorkCompleted = false;
auto &Plugin = getPlugin();
auto *AsyncQueue = static_cast<AsyncQueueTy *>(AsyncInfo.Queue);
@@ -367,6 +373,9 @@ Error L0DeviceTy::queryAsyncImpl(__tgt_async_info &AsyncInfo) {
if (!AsyncQueue->WaitEvents.empty())
return Plugin::success();
+ if (IsQueueWorkCompleted)
+ *IsQueueWorkCompleted = true;
+
// Commit delayed USM2M copies.
for (auto &USM2M : AsyncQueue->USM2MList) {
std::copy_n(static_cast<const char *>(std::get<0>(USM2M)),
@@ -377,9 +386,11 @@ Error L0DeviceTy::queryAsyncImpl(__tgt_async_info &AsyncInfo) {
std::copy_n(static_cast<char *>(std::get<0>(H2M)), std::get<2>(H2M),
static_cast<char *>(std::get<1>(H2M)));
}
- Plugin.releaseAsyncQueue(AsyncQueue);
- getStagingBuffer().reset();
- AsyncInfo.Queue = nullptr;
+ if (ReleaseQueue) {
+ Plugin.releaseAsyncQueue(AsyncQueue);
+ getStagingBuffer().reset();
+ AsyncInfo.Queue = nullptr;
+ }
return Plugin::success();
}
diff --git a/offload/test/CMakeLists.txt b/offload/test/CMakeLists.txt
index 711621de9075d..e17bcfb4acaae 100644
--- a/offload/test/CMakeLists.txt
+++ b/offload/test/CMakeLists.txt
@@ -73,5 +73,4 @@ configure_lit_site_cfg(
add_lit_testsuite(check-offload-unit "Running offload unittest suites"
${CMAKE_CURRENT_BINARY_DIR}/unit
- EXCLUDE_FROM_CHECK_ALL
DEPENDS LLVMOffload OffloadUnitTests)
diff --git a/offload/unittests/OffloadAPI/CMakeLists.txt b/offload/unittests/OffloadAPI/CMakeLists.txt
index 546d7dac692d3..fce38c5fd75ae 100644
--- a/offload/unittests/OffloadAPI/CMakeLists.txt
+++ b/offload/unittests/OffloadAPI/CMakeLists.txt
@@ -48,7 +48,8 @@ add_offload_unittest("queue"
queue/olGetQueueInfo.cpp
queue/olGetQueueInfoSize.cpp
queue/olWaitEvents.cpp
- queue/olLaunchHostFunction.cpp)
+ queue/olLaunchHostFunction.cpp
+ queue/olQueryQueue.cpp)
add_offload_unittest("symbol"
symbol/olGetSymbol.cpp
diff --git a/offload/unittests/OffloadAPI/queue/olQueryQueue.cpp b/offload/unittests/OffloadAPI/queue/olQueryQueue.cpp
new file mode 100644
index 0000000000000..766269bab29ea
--- /dev/null
+++ b/offload/unittests/OffloadAPI/queue/olQueryQueue.cpp
@@ -0,0 +1,24 @@
+//===------- Offload API tests - olQueryQueue ------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "../common/Fixtures.hpp"
+#include <OffloadAPI.h>
+#include <gtest/gtest.h>
+
+using olQueryQueueTest = OffloadQueueTest;
+OFFLOAD_TESTS_INSTANTIATE_DEVICE_FIXTURE(olQueryQueueTest);
+
+TEST_P(olQueryQueueTest, SuccessEmptyAsyncQueue) {
+ ASSERT_SUCCESS(olQueryQueue(Queue, nullptr));
+}
+
+TEST_P(olQueryQueueTest, SuccessEmptyAsyncQueueCheckResult) {
+ bool IsQueueWorkCompleted;
+ ASSERT_SUCCESS(olQueryQueue(Queue, &IsQueueWorkCompleted));
+ ASSERT_TRUE(IsQueueWorkCompleted);
+}
diff --git a/offload/unittests/OffloadAPI/queue/olSyncQueue.cpp b/offload/unittests/OffloadAPI/queue/olSyncQueue.cpp
index 2e130af77984c..f07ebbdbaed82 100644
--- a/offload/unittests/OffloadAPI/queue/olSyncQueue.cpp
+++ b/offload/unittests/OffloadAPI/queue/olSyncQueue.cpp
@@ -16,7 +16,3 @@ OFFLOAD_TESTS_INSTANTIATE_DEVICE_FIXTURE(olSyncQueueTest);
TEST_P(olSyncQueueTest, SuccessEmptyQueue) {
ASSERT_SUCCESS(olSyncQueue(Queue));
}
-
-TEST_P(olSyncQueueTest, SuccessEmptyAsyncQueue) {
- ASSERT_SUCCESS(olQueryQueue(Queue));
-}
>From 1fd674c50ae311123042c528f065a35eeb69acb5 Mon Sep 17 00:00:00 2001
From: "Fine, Gregory" <gregory.fine at intel.com>
Date: Mon, 19 Jan 2026 23:08:29 -0800
Subject: [PATCH 5/6] Revert spurious change
---
offload/test/CMakeLists.txt | 1 +
1 file changed, 1 insertion(+)
diff --git a/offload/test/CMakeLists.txt b/offload/test/CMakeLists.txt
index e17bcfb4acaae..711621de9075d 100644
--- a/offload/test/CMakeLists.txt
+++ b/offload/test/CMakeLists.txt
@@ -73,4 +73,5 @@ configure_lit_site_cfg(
add_lit_testsuite(check-offload-unit "Running offload unittest suites"
${CMAKE_CURRENT_BINARY_DIR}/unit
+ EXCLUDE_FROM_CHECK_ALL
DEPENDS LLVMOffload OffloadUnitTests)
>From e323516d861f0ca5c1d99843b80a33581bd8c4f2 Mon Sep 17 00:00:00 2001
From: "Fine, Gregory" <gregory.fine at intel.com>
Date: Tue, 20 Jan 2026 09:54:13 -0800
Subject: [PATCH 6/6] Address PR comments and fix formatting
---
offload/liboffload/API/Queue.td | 5 ++---
offload/liboffload/src/OffloadImpl.cpp | 3 ++-
offload/plugins-nextgen/amdgpu/src/rtl.cpp | 3 +--
offload/plugins-nextgen/common/include/PluginInterface.h | 3 +--
offload/plugins-nextgen/cuda/src/rtl.cpp | 5 ++---
offload/plugins-nextgen/host/src/rtl.cpp | 3 +--
offload/plugins-nextgen/level_zero/include/L0Device.h | 3 +--
offload/plugins-nextgen/level_zero/src/L0Device.cpp | 9 ++++-----
offload/unittests/OffloadAPI/queue/olQueryQueue.cpp | 4 ++--
9 files changed, 16 insertions(+), 22 deletions(-)
diff --git a/offload/liboffload/API/Queue.td b/offload/liboffload/API/Queue.td
index daf1e5f2762a2..4008432375753 100644
--- a/offload/liboffload/API/Queue.td
+++ b/offload/liboffload/API/Queue.td
@@ -129,12 +129,11 @@ def olLaunchHostFunction : Function {
def olQueryQueue : Function {
let desc = "Query for queue work completion in a non-blocking manner.";
let details = [
- "The function checks if a queue work has completed enqueued work without blocking the calling thread.",
- "If enqueued work has completed the function would perform internal queue cleanup."
+ "The function checks if a queue work has completed enqueued work without blocking the calling thread."
];
let params = [
Param<"ol_queue_handle_t", "Queue", "handle of the queue", PARAM_IN>,
- Param<"bool *", "IsQueueWorkCompleted", " A flag indicating if a queue work has completed", PARAM_OUT_OPTIONAL>
+ Param<"bool *", "IsQueueWorkCompleted", " A flag indicating if the queue work has completed", PARAM_OUT_OPTIONAL>
];
let returns = [];
}
diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp
index 1906c7a2e941e..cf38f7280ec37 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -1233,7 +1233,8 @@ Error olMemUnregister_impl(ol_device_handle_t Device, void *Ptr) {
Error olQueryQueue_impl(ol_queue_handle_t Queue, bool *IsQueueWorkCompleted) {
if (Queue->AsyncInfo->Queue) {
- if (auto Err = Queue->Device->Device->queryAsync(Queue->AsyncInfo, false, IsQueueWorkCompleted))
+ if (auto Err = Queue->Device->Device->queryAsync(Queue->AsyncInfo, false,
+ IsQueueWorkCompleted))
return Err;
}
return Error::success();
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index 7e4a7ce26921c..4cc976dae1ed1 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -2430,8 +2430,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
}
/// Query for the completion of the pending operations on the async info.
- Error queryAsyncImpl(__tgt_async_info &AsyncInfo,
- bool ReleaseQueue,
+ Error queryAsyncImpl(__tgt_async_info &AsyncInfo, bool ReleaseQueue,
bool *IsQueueWorkCompleted) override {
if (IsQueueWorkCompleted)
*IsQueueWorkCompleted = false;
diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h
index be58b611d2c82..19db44cf04655 100644
--- a/offload/plugins-nextgen/common/include/PluginInterface.h
+++ b/offload/plugins-nextgen/common/include/PluginInterface.h
@@ -856,8 +856,7 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
/// structure in a non-blocking manner.
Error queryAsync(__tgt_async_info *AsyncInfo, bool ReleaseQueue = true,
bool *IsQueueWorkCompleted = nullptr);
- virtual Error queryAsyncImpl(__tgt_async_info &AsyncInfo,
- bool ReleaseQueue,
+ virtual Error queryAsyncImpl(__tgt_async_info &AsyncInfo, bool ReleaseQueue,
bool *IsQueueWorkCompleted) = 0;
/// Check whether the architecture supports VA management
diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp
index 318c9f970435b..657dec89670e7 100644
--- a/offload/plugins-nextgen/cuda/src/rtl.cpp
+++ b/offload/plugins-nextgen/cuda/src/rtl.cpp
@@ -792,8 +792,7 @@ struct CUDADeviceTy : public GenericDeviceTy {
}
/// Query for the completion of the pending operations on the async info.
- Error queryAsyncImpl(__tgt_async_info &AsyncInfo,
- bool ReleaseQueue,
+ Error queryAsyncImpl(__tgt_async_info &AsyncInfo, bool ReleaseQueue,
bool *IsQueueWorkCompleted) override {
if (IsQueueWorkCompleted)
*IsQueueWorkCompleted = false;
@@ -803,7 +802,7 @@ struct CUDADeviceTy : public GenericDeviceTy {
// Not ready streams must be considered as successful operations.
if (Res == CUDA_ERROR_NOT_READY)
return Plugin::success();
-
+
if (IsQueueWorkCompleted)
*IsQueueWorkCompleted = true;
// Once the stream is synchronized and the operations completed (or an error
diff --git a/offload/plugins-nextgen/host/src/rtl.cpp b/offload/plugins-nextgen/host/src/rtl.cpp
index 30fe612c4bad1..603379630ae8e 100644
--- a/offload/plugins-nextgen/host/src/rtl.cpp
+++ b/offload/plugins-nextgen/host/src/rtl.cpp
@@ -336,8 +336,7 @@ struct GenELF64DeviceTy : public GenericDeviceTy {
/// All functions are already synchronous. No need to do anything on this
/// query function.
- Error queryAsyncImpl(__tgt_async_info &AsyncInfo,
- bool ReleaseQueue,
+ Error queryAsyncImpl(__tgt_async_info &AsyncInfo, bool ReleaseQueue,
bool *IsQueueWorkCompleted) override {
if (IsQueueWorkCompleted)
*IsQueueWorkCompleted = true;
diff --git a/offload/plugins-nextgen/level_zero/include/L0Device.h b/offload/plugins-nextgen/level_zero/include/L0Device.h
index 9bbe900c288af..001a41ba77d7b 100644
--- a/offload/plugins-nextgen/level_zero/include/L0Device.h
+++ b/offload/plugins-nextgen/level_zero/include/L0Device.h
@@ -576,8 +576,7 @@ class L0DeviceTy final : public GenericDeviceTy {
AsyncInfoWrapperTy &AsyncInfoWrapper) override;
Error synchronizeImpl(__tgt_async_info &AsyncInfo,
bool ReleaseQueue) override;
- Error queryAsyncImpl(__tgt_async_info &AsyncInfo,
- bool ReleaseQueue,
+ Error queryAsyncImpl(__tgt_async_info &AsyncInfo, bool ReleaseQueue,
bool *IsQueueWorkCompleted) override;
Error dataSubmitImpl(void *TgtPtr, const void *HstPtr, int64_t Size,
AsyncInfoWrapperTy &AsyncInfoWrapper) override;
diff --git a/offload/plugins-nextgen/level_zero/src/L0Device.cpp b/offload/plugins-nextgen/level_zero/src/L0Device.cpp
index fbe154d45afc8..24dfbd6654730 100644
--- a/offload/plugins-nextgen/level_zero/src/L0Device.cpp
+++ b/offload/plugins-nextgen/level_zero/src/L0Device.cpp
@@ -356,16 +356,15 @@ L0DeviceTy::hasPendingWorkImpl(AsyncInfoWrapperTy &AsyncInfoWrapper) {
return true;
}
-Error L0DeviceTy::queryAsyncImpl(__tgt_async_info &AsyncInfo,
- bool ReleaseQueue,
+Error L0DeviceTy::queryAsyncImpl(__tgt_async_info &AsyncInfo, bool ReleaseQueue,
bool *IsQueueWorkCompleted) {
if (IsQueueWorkCompleted)
- *IsQueueWorkCompleted = true;
+ *IsQueueWorkCompleted = true;
const bool IsAsync = AsyncInfo.Queue && asyncEnabled();
if (!IsAsync)
return Plugin::success();
if (IsQueueWorkCompleted)
- *IsQueueWorkCompleted = false;
+ *IsQueueWorkCompleted = false;
auto &Plugin = getPlugin();
auto *AsyncQueue = static_cast<AsyncQueueTy *>(AsyncInfo.Queue);
@@ -374,7 +373,7 @@ Error L0DeviceTy::queryAsyncImpl(__tgt_async_info &AsyncInfo,
return Plugin::success();
if (IsQueueWorkCompleted)
- *IsQueueWorkCompleted = true;
+ *IsQueueWorkCompleted = true;
// Commit delayed USM2M copies.
for (auto &USM2M : AsyncQueue->USM2MList) {
diff --git a/offload/unittests/OffloadAPI/queue/olQueryQueue.cpp b/offload/unittests/OffloadAPI/queue/olQueryQueue.cpp
index 766269bab29ea..4efd939f88bba 100644
--- a/offload/unittests/OffloadAPI/queue/olQueryQueue.cpp
+++ b/offload/unittests/OffloadAPI/queue/olQueryQueue.cpp
@@ -1,10 +1,10 @@
-//===------- Offload API tests - olQueryQueue ------------------------------===//
+//===------- Offload API tests - olQueryQueue ----------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
-//===----------------------------------------------------------------------===//
+//===--------------------------------------------------------------------===//
#include "../common/Fixtures.hpp"
#include <OffloadAPI.h>
More information about the llvm-commits
mailing list