[llvm] [Offload] Implement the remaining initial Offload API (PR #122106)
Callum Fare via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 13 08:47:50 PST 2025
https://github.com/callumfare updated https://github.com/llvm/llvm-project/pull/122106
>From 7cbe788ddc0de682ce0f939caf4619e99889f992 Mon Sep 17 00:00:00 2001
From: Callum Fare <callum at codeplay.com>
Date: Wed, 11 Dec 2024 12:08:44 +0000
Subject: [PATCH 01/16] WIP: Implement olMemAlloc, olMemFree
---
offload/liboffload/API/Memory.td | 45 +++++++++
offload/liboffload/API/OffloadAPI.td | 1 +
.../liboffload/include/generated/OffloadAPI.h | 95 +++++++++++++++++++
.../include/generated/OffloadEntryPoints.inc | 93 ++++++++++++++++++
.../include/generated/OffloadFuncs.inc | 4 +
.../generated/OffloadImplFuncDecls.inc | 7 ++
.../include/generated/OffloadPrint.hpp | 53 +++++++++++
offload/liboffload/src/OffloadImpl.cpp | 35 +++++++
8 files changed, 333 insertions(+)
create mode 100644 offload/liboffload/API/Memory.td
diff --git a/offload/liboffload/API/Memory.td b/offload/liboffload/API/Memory.td
new file mode 100644
index 0000000000000..8cfaf70311e34
--- /dev/null
+++ b/offload/liboffload/API/Memory.td
@@ -0,0 +1,45 @@
+//===-- Memory.td - Memory definitions for Offload ---------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains Offload API definitions related to memory allocations
+//
+//===----------------------------------------------------------------------===//
+
+def : Enum {
+ let name = "ol_alloc_type_t";
+ let desc = "Represents the type of allocation made with olMemAlloc";
+ let etors = [
+ Etor<"HOST", "Host allocation">,
+ Etor<"DEVICE", "Device allocation">,
+ Etor<"SHARED", "Shared allocation">
+ ];
+}
+
+def : Function {
+ let name = "olMemAlloc";
+ let desc = "Creates a memory allocation on the specified device";
+ let params = [
+ Param<"ol_device_handle_t", "Device", "handle of the device to allocate on", PARAM_IN>,
+ Param<"ol_alloc_type_t", "Type", "type of the allocation", PARAM_IN>,
+ Param<"size_t", "Size", "size of the allocation in bytes", PARAM_IN>,
+ Param<"size_t", "Aligment", "alignment of the allocation in bytes", PARAM_IN>,
+ Param<"void**", "AllocationOut", "output for the allocated pointer", PARAM_OUT>
+ ];
+ let returns = [];
+}
+
+def : Function {
+ let name = "olMemFree";
+ let desc = "Frees a memory allocation previously made by olMemAlloc";
+ let params = [
+ Param<"ol_device_handle_t", "Device", "handle of the device to allocate on", PARAM_IN>,
+ Param<"ol_alloc_type_t", "Type", "type of the allocation", PARAM_IN>,
+ Param<"void*", "Address", "address of the allocation to free", PARAM_IN>,
+ ];
+ let returns = [];
+}
diff --git a/offload/liboffload/API/OffloadAPI.td b/offload/liboffload/API/OffloadAPI.td
index 8a0c3c4058122..a609cc7ac80b4 100644
--- a/offload/liboffload/API/OffloadAPI.td
+++ b/offload/liboffload/API/OffloadAPI.td
@@ -13,3 +13,4 @@ include "APIDefs.td"
include "Common.td"
include "Platform.td"
include "Device.td"
+include "Memory.td"
diff --git a/offload/liboffload/include/generated/OffloadAPI.h b/offload/liboffload/include/generated/OffloadAPI.h
index 11fcc96625ab8..81f3a8e0201ba 100644
--- a/offload/liboffload/include/generated/OffloadAPI.h
+++ b/offload/liboffload/include/generated/OffloadAPI.h
@@ -460,6 +460,67 @@ OL_APIEXPORT ol_result_t OL_APICALL olGetDeviceInfoSize(
// [out] pointer to the number of bytes required to store the query
size_t *PropSizeRet);
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Represents the type of allocation made with olMemAlloc
+typedef enum ol_alloc_type_t {
+ /// Host allocation
+ OL_ALLOC_TYPE_HOST = 0,
+ /// Device allocation
+ OL_ALLOC_TYPE_DEVICE = 1,
+ /// Shared allocation
+ OL_ALLOC_TYPE_SHARED = 2,
+ /// @cond
+ OL_ALLOC_TYPE_FORCE_UINT32 = 0x7fffffff
+ /// @endcond
+
+} ol_alloc_type_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Creates a memory allocation on the specified device
+///
+/// @details
+///
+/// @returns
+/// - ::OL_RESULT_SUCCESS
+/// - ::OL_ERRC_UNINITIALIZED
+/// - ::OL_ERRC_DEVICE_LOST
+/// - ::OL_ERRC_INVALID_NULL_HANDLE
+/// + `NULL == Device`
+/// - ::OL_ERRC_INVALID_NULL_POINTER
+/// + `NULL == AllocationOut`
+OL_APIEXPORT ol_result_t OL_APICALL olMemAlloc(
+ // [in] handle of the device to allocate on
+ ol_device_handle_t Device,
+ // [in] type of the allocation
+ ol_alloc_type_t Type,
+ // [in] size of the allocation in bytes
+ size_t Size,
+ // [in] alignment of the allocation in bytes
+ size_t Aligment,
+ // [out] output for the allocated pointer
+ void **AllocationOut);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Frees a memory allocation previously made by olMemAlloc
+///
+/// @details
+///
+/// @returns
+/// - ::OL_RESULT_SUCCESS
+/// - ::OL_ERRC_UNINITIALIZED
+/// - ::OL_ERRC_DEVICE_LOST
+/// - ::OL_ERRC_INVALID_NULL_HANDLE
+/// + `NULL == Device`
+/// - ::OL_ERRC_INVALID_NULL_POINTER
+/// + `NULL == Address`
+OL_APIEXPORT ol_result_t OL_APICALL olMemFree(
+ // [in] handle of the device to allocate on
+ ol_device_handle_t Device,
+ // [in] type of the allocation
+ ol_alloc_type_t Type,
+ // [in] address of the allocation to free
+ void *Address);
+
///////////////////////////////////////////////////////////////////////////////
/// @brief Function parameters for olGetPlatform
/// @details Each entry is a pointer to the parameter passed to the function;
@@ -530,6 +591,26 @@ typedef struct ol_get_device_info_size_params_t {
size_t **pPropSizeRet;
} ol_get_device_info_size_params_t;
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Function parameters for olMemAlloc
+/// @details Each entry is a pointer to the parameter passed to the function;
+typedef struct ol_mem_alloc_params_t {
+ ol_device_handle_t *pDevice;
+ ol_alloc_type_t *pType;
+ size_t *pSize;
+ size_t *pAligment;
+ void ***pAllocationOut;
+} ol_mem_alloc_params_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Function parameters for olMemFree
+/// @details Each entry is a pointer to the parameter passed to the function;
+typedef struct ol_mem_free_params_t {
+ ol_device_handle_t *pDevice;
+ ol_alloc_type_t *pType;
+ void **pAddress;
+} ol_mem_free_params_t;
+
///////////////////////////////////////////////////////////////////////////////
/// @brief Variant of olInit that also sets source code location information
/// @details See also ::olInit
@@ -605,6 +686,20 @@ OL_APIEXPORT ol_result_t OL_APICALL olGetDeviceInfoSizeWithCodeLoc(
ol_device_handle_t Device, ol_device_info_t PropName, size_t *PropSizeRet,
ol_code_location_t *CodeLocation);
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Variant of olMemAlloc that also sets source code location information
+/// @details See also ::olMemAlloc
+OL_APIEXPORT ol_result_t OL_APICALL olMemAllocWithCodeLoc(
+ ol_device_handle_t Device, ol_alloc_type_t Type, size_t Size,
+ size_t Aligment, void **AllocationOut, ol_code_location_t *CodeLocation);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Variant of olMemFree that also sets source code location information
+/// @details See also ::olMemFree
+OL_APIEXPORT ol_result_t OL_APICALL
+olMemFreeWithCodeLoc(ol_device_handle_t Device, ol_alloc_type_t Type,
+ void *Address, ol_code_location_t *CodeLocation);
+
#if defined(__cplusplus)
} // extern "C"
#endif
diff --git a/offload/liboffload/include/generated/OffloadEntryPoints.inc b/offload/liboffload/include/generated/OffloadEntryPoints.inc
index 49c1c8169615e..08060dae80f03 100644
--- a/offload/liboffload/include/generated/OffloadEntryPoints.inc
+++ b/offload/liboffload/include/generated/OffloadEntryPoints.inc
@@ -439,3 +439,96 @@ ol_result_t olGetDeviceInfoSizeWithCodeLoc(ol_device_handle_t Device,
currentCodeLocation() = nullptr;
return Result;
}
+
+///////////////////////////////////////////////////////////////////////////////
+ol_impl_result_t olMemAlloc_val(ol_device_handle_t Device, ol_alloc_type_t Type,
+ size_t Size, size_t Aligment,
+ void **AllocationOut) {
+ if (true /*enableParameterValidation*/) {
+ if (NULL == Device) {
+ return OL_ERRC_INVALID_NULL_HANDLE;
+ }
+
+ if (NULL == AllocationOut) {
+ return OL_ERRC_INVALID_NULL_POINTER;
+ }
+ }
+
+ return olMemAlloc_impl(Device, Type, Size, Aligment, AllocationOut);
+}
+OL_APIEXPORT ol_result_t OL_APICALL olMemAlloc(ol_device_handle_t Device,
+ ol_alloc_type_t Type,
+ size_t Size, size_t Aligment,
+ void **AllocationOut) {
+ if (offloadConfig().TracingEnabled) {
+ std::cout << "---> olMemAlloc";
+ }
+
+ ol_result_t Result =
+ olMemAlloc_val(Device, Type, Size, Aligment, AllocationOut);
+
+ if (offloadConfig().TracingEnabled) {
+ ol_mem_alloc_params_t Params = {&Device, &Type, &Size, &Aligment,
+ &AllocationOut};
+ std::cout << "(" << &Params << ")";
+ std::cout << "-> " << Result << "\n";
+ if (Result && Result->Details) {
+ std::cout << " *Error Details* " << Result->Details << " \n";
+ }
+ }
+ return Result;
+}
+ol_result_t olMemAllocWithCodeLoc(ol_device_handle_t Device,
+ ol_alloc_type_t Type, size_t Size,
+ size_t Aligment, void **AllocationOut,
+ ol_code_location_t *CodeLocation) {
+ currentCodeLocation() = CodeLocation;
+ ol_result_t Result = olMemAlloc(Device, Type, Size, Aligment, AllocationOut);
+
+ currentCodeLocation() = nullptr;
+ return Result;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+ol_impl_result_t olMemFree_val(ol_device_handle_t Device, ol_alloc_type_t Type,
+ void *Address) {
+ if (true /*enableParameterValidation*/) {
+ if (NULL == Device) {
+ return OL_ERRC_INVALID_NULL_HANDLE;
+ }
+
+ if (NULL == Address) {
+ return OL_ERRC_INVALID_NULL_POINTER;
+ }
+ }
+
+ return olMemFree_impl(Device, Type, Address);
+}
+OL_APIEXPORT ol_result_t OL_APICALL olMemFree(ol_device_handle_t Device,
+ ol_alloc_type_t Type,
+ void *Address) {
+ if (offloadConfig().TracingEnabled) {
+ std::cout << "---> olMemFree";
+ }
+
+ ol_result_t Result = olMemFree_val(Device, Type, Address);
+
+ if (offloadConfig().TracingEnabled) {
+ ol_mem_free_params_t Params = {&Device, &Type, &Address};
+ std::cout << "(" << &Params << ")";
+ std::cout << "-> " << Result << "\n";
+ if (Result && Result->Details) {
+ std::cout << " *Error Details* " << Result->Details << " \n";
+ }
+ }
+ return Result;
+}
+ol_result_t olMemFreeWithCodeLoc(ol_device_handle_t Device,
+ ol_alloc_type_t Type, void *Address,
+ ol_code_location_t *CodeLocation) {
+ currentCodeLocation() = CodeLocation;
+ ol_result_t Result = olMemFree(Device, Type, Address);
+
+ currentCodeLocation() = nullptr;
+ return Result;
+}
diff --git a/offload/liboffload/include/generated/OffloadFuncs.inc b/offload/liboffload/include/generated/OffloadFuncs.inc
index 48115493c790f..26120f18279dc 100644
--- a/offload/liboffload/include/generated/OffloadFuncs.inc
+++ b/offload/liboffload/include/generated/OffloadFuncs.inc
@@ -20,6 +20,8 @@ OFFLOAD_FUNC(olGetDeviceCount)
OFFLOAD_FUNC(olGetDevice)
OFFLOAD_FUNC(olGetDeviceInfo)
OFFLOAD_FUNC(olGetDeviceInfoSize)
+OFFLOAD_FUNC(olMemAlloc)
+OFFLOAD_FUNC(olMemFree)
OFFLOAD_FUNC(olInitWithCodeLoc)
OFFLOAD_FUNC(olShutDownWithCodeLoc)
OFFLOAD_FUNC(olGetPlatformWithCodeLoc)
@@ -30,5 +32,7 @@ OFFLOAD_FUNC(olGetDeviceCountWithCodeLoc)
OFFLOAD_FUNC(olGetDeviceWithCodeLoc)
OFFLOAD_FUNC(olGetDeviceInfoWithCodeLoc)
OFFLOAD_FUNC(olGetDeviceInfoSizeWithCodeLoc)
+OFFLOAD_FUNC(olMemAllocWithCodeLoc)
+OFFLOAD_FUNC(olMemFreeWithCodeLoc)
#undef OFFLOAD_FUNC
diff --git a/offload/liboffload/include/generated/OffloadImplFuncDecls.inc b/offload/liboffload/include/generated/OffloadImplFuncDecls.inc
index 5b26b2653a05d..f0a96081fd243 100644
--- a/offload/liboffload/include/generated/OffloadImplFuncDecls.inc
+++ b/offload/liboffload/include/generated/OffloadImplFuncDecls.inc
@@ -36,3 +36,10 @@ ol_impl_result_t olGetDeviceInfo_impl(ol_device_handle_t Device,
ol_impl_result_t olGetDeviceInfoSize_impl(ol_device_handle_t Device,
ol_device_info_t PropName,
size_t *PropSizeRet);
+
+ol_impl_result_t olMemAlloc_impl(ol_device_handle_t Device,
+ ol_alloc_type_t Type, size_t Size,
+ size_t Aligment, void **AllocationOut);
+
+ol_impl_result_t olMemFree_impl(ol_device_handle_t Device, ol_alloc_type_t Type,
+ void *Address);
diff --git a/offload/liboffload/include/generated/OffloadPrint.hpp b/offload/liboffload/include/generated/OffloadPrint.hpp
index 8981bb054a4cb..cff754237568e 100644
--- a/offload/liboffload/include/generated/OffloadPrint.hpp
+++ b/offload/liboffload/include/generated/OffloadPrint.hpp
@@ -31,6 +31,7 @@ inline std::ostream &operator<<(std::ostream &os,
enum ol_platform_backend_t value);
inline std::ostream &operator<<(std::ostream &os, enum ol_device_type_t value);
inline std::ostream &operator<<(std::ostream &os, enum ol_device_info_t value);
+inline std::ostream &operator<<(std::ostream &os, enum ol_alloc_type_t value);
///////////////////////////////////////////////////////////////////////////////
/// @brief Print operator for the ol_errc_t type
@@ -274,6 +275,26 @@ inline void printTagged(std::ostream &os, const void *ptr,
break;
}
}
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Print operator for the ol_alloc_type_t type
+/// @returns std::ostream &
+inline std::ostream &operator<<(std::ostream &os, enum ol_alloc_type_t value) {
+ switch (value) {
+ case OL_ALLOC_TYPE_HOST:
+ os << "OL_ALLOC_TYPE_HOST";
+ break;
+ case OL_ALLOC_TYPE_DEVICE:
+ os << "OL_ALLOC_TYPE_DEVICE";
+ break;
+ case OL_ALLOC_TYPE_SHARED:
+ os << "OL_ALLOC_TYPE_SHARED";
+ break;
+ default:
+ os << "unknown enumerator";
+ break;
+ }
+ return os;
+}
inline std::ostream &operator<<(std::ostream &os,
const ol_error_struct_t *Err) {
@@ -402,6 +423,38 @@ operator<<(std::ostream &os,
return os;
}
+inline std::ostream &operator<<(std::ostream &os,
+ const struct ol_mem_alloc_params_t *params) {
+ os << ".Device = ";
+ printPtr(os, *params->pDevice);
+ os << ", ";
+ os << ".Type = ";
+ os << *params->pType;
+ os << ", ";
+ os << ".Size = ";
+ os << *params->pSize;
+ os << ", ";
+ os << ".Aligment = ";
+ os << *params->pAligment;
+ os << ", ";
+ os << ".AllocationOut = ";
+ printPtr(os, *params->pAllocationOut);
+ return os;
+}
+
+inline std::ostream &operator<<(std::ostream &os,
+ const struct ol_mem_free_params_t *params) {
+ os << ".Device = ";
+ printPtr(os, *params->pDevice);
+ os << ", ";
+ os << ".Type = ";
+ os << *params->pType;
+ os << ", ";
+ os << ".Address = ";
+ printPtr(os, *params->pAddress);
+ return os;
+}
+
///////////////////////////////////////////////////////////////////////////////
// @brief Print pointer value
template <typename T>
diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp
index 457f1053f1634..3e609ed03917f 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -245,3 +245,38 @@ ol_impl_result_t olGetDeviceInfoSize_impl(ol_device_handle_t Device,
size_t *PropSizeRet) {
return olGetDeviceInfoImplDetail(Device, PropName, 0, nullptr, PropSizeRet);
}
+
+TargetAllocTy convertOlToPluginAllocTy(ol_alloc_type_t Type) {
+ switch (Type) {
+ case OL_ALLOC_TYPE_DEVICE:
+ return TARGET_ALLOC_DEVICE;
+ case OL_ALLOC_TYPE_HOST:
+ return TARGET_ALLOC_HOST;
+ case OL_ALLOC_TYPE_SHARED:
+ default:
+ return TARGET_ALLOC_SHARED;
+ }
+}
+
+ol_impl_result_t olMemAlloc_impl(ol_device_handle_t Device,
+ ol_alloc_type_t Type, size_t Size, size_t,
+ void **AllocationOut) {
+ auto Alloc =
+ Device->Device.dataAlloc(Size, nullptr, convertOlToPluginAllocTy(Type));
+ if (!Alloc) {
+ return {OL_ERRC_OUT_OF_RESOURCES,
+ formatv("Could not create allocation on device {0}", Device).str()};
+ }
+
+ *AllocationOut = *Alloc;
+ return OL_SUCCESS;
+}
+
+ol_impl_result_t olMemFree_impl(ol_device_handle_t Device, ol_alloc_type_t Type,
+ void *Address) {
+ auto Res = Device->Device.dataDelete(Address, convertOlToPluginAllocTy(Type));
+ if (Res) {
+ return {OL_ERRC_OUT_OF_RESOURCES, "Could not free allocation"};
+ }
+ return OL_SUCCESS;
+}
>From 73ed36a366dec72b63dccdc24d240e0efc0bf528 Mon Sep 17 00:00:00 2001
From: Callum Fare <callum at codeplay.com>
Date: Wed, 11 Dec 2024 12:13:29 +0000
Subject: [PATCH 02/16] Add size check
---
offload/liboffload/API/Memory.td | 6 +++++-
offload/liboffload/include/generated/OffloadAPI.h | 2 ++
offload/liboffload/include/generated/OffloadEntryPoints.inc | 4 ++++
3 files changed, 11 insertions(+), 1 deletion(-)
diff --git a/offload/liboffload/API/Memory.td b/offload/liboffload/API/Memory.td
index 8cfaf70311e34..c15ae6f6d21ca 100644
--- a/offload/liboffload/API/Memory.td
+++ b/offload/liboffload/API/Memory.td
@@ -30,7 +30,11 @@ def : Function {
Param<"size_t", "Aligment", "alignment of the allocation in bytes", PARAM_IN>,
Param<"void**", "AllocationOut", "output for the allocated pointer", PARAM_OUT>
];
- let returns = [];
+ let returns = [
+ Return<"OL_ERRC_INVALID_SIZE", [
+ "`Size == 0`"
+ ]>
+ ];
}
def : Function {
diff --git a/offload/liboffload/include/generated/OffloadAPI.h b/offload/liboffload/include/generated/OffloadAPI.h
index 81f3a8e0201ba..4c3356645e55a 100644
--- a/offload/liboffload/include/generated/OffloadAPI.h
+++ b/offload/liboffload/include/generated/OffloadAPI.h
@@ -484,6 +484,8 @@ typedef enum ol_alloc_type_t {
/// - ::OL_RESULT_SUCCESS
/// - ::OL_ERRC_UNINITIALIZED
/// - ::OL_ERRC_DEVICE_LOST
+/// - ::OL_ERRC_INVALID_SIZE
+/// + `Size == 0`
/// - ::OL_ERRC_INVALID_NULL_HANDLE
/// + `NULL == Device`
/// - ::OL_ERRC_INVALID_NULL_POINTER
diff --git a/offload/liboffload/include/generated/OffloadEntryPoints.inc b/offload/liboffload/include/generated/OffloadEntryPoints.inc
index 08060dae80f03..bcde65452b265 100644
--- a/offload/liboffload/include/generated/OffloadEntryPoints.inc
+++ b/offload/liboffload/include/generated/OffloadEntryPoints.inc
@@ -445,6 +445,10 @@ ol_impl_result_t olMemAlloc_val(ol_device_handle_t Device, ol_alloc_type_t Type,
size_t Size, size_t Aligment,
void **AllocationOut) {
if (true /*enableParameterValidation*/) {
+ if (Size == 0) {
+ return OL_ERRC_INVALID_SIZE;
+ }
+
if (NULL == Device) {
return OL_ERRC_INVALID_NULL_HANDLE;
}
>From be5c36bd2b23fc9eb7886586d8687bde4de145e0 Mon Sep 17 00:00:00 2001
From: Callum Fare <callum at codeplay.com>
Date: Mon, 6 Jan 2025 15:22:52 +0000
Subject: [PATCH 03/16] Implement minimum Offload API needed to launch a SYCL
kernel
---
offload/liboffload/API/Common.td | 20 +
offload/liboffload/API/Enqueue.td | 68 ++
offload/liboffload/API/Event.td | 41 +
offload/liboffload/API/Kernel.td | 44 +
offload/liboffload/API/OffloadAPI.td | 5 +
offload/liboffload/API/Program.td | 44 +
offload/liboffload/API/Queue.td | 52 ++
.../liboffload/include/generated/OffloadAPI.h | 656 +++++++++++++++
.../include/generated/OffloadEntryPoints.inc | 775 ++++++++++++++++++
.../include/generated/OffloadFuncs.inc | 36 +
.../generated/OffloadImplFuncDecls.inc | 54 ++
.../include/generated/OffloadPrint.hpp | 210 +++++
offload/liboffload/src/OffloadImpl.cpp | 360 ++++++++
.../common/include/GlobalHandler.h | 5 +-
offload/plugins-nextgen/cuda/src/rtl.cpp | 28 +
offload/plugins-nextgen/host/src/rtl.cpp | 4 +-
offload/unittests/OffloadAPI/CMakeLists.txt | 4 +-
.../OffloadAPI/queue/olCreateQueue.cpp | 19 +
18 files changed, 2420 insertions(+), 5 deletions(-)
create mode 100644 offload/liboffload/API/Enqueue.td
create mode 100644 offload/liboffload/API/Event.td
create mode 100644 offload/liboffload/API/Kernel.td
create mode 100644 offload/liboffload/API/Program.td
create mode 100644 offload/liboffload/API/Queue.td
create mode 100644 offload/unittests/OffloadAPI/queue/olCreateQueue.cpp
diff --git a/offload/liboffload/API/Common.td b/offload/liboffload/API/Common.td
index 5b19d1d47129e..7fedb2002f157 100644
--- a/offload/liboffload/API/Common.td
+++ b/offload/liboffload/API/Common.td
@@ -62,6 +62,26 @@ def : Handle {
let desc = "Handle of context object";
}
+def : Handle {
+ let name = "ol_queue_handle_t";
+ let desc = "Handle of queue object";
+}
+
+def : Handle {
+ let name = "ol_event_handle_t";
+ let desc = "Handle of event object";
+}
+
+def : Handle {
+ let name = "ol_program_handle_t";
+ let desc = "Handle of program object";
+}
+
+def : Handle {
+ let name = "ol_kernel_handle_t";
+ let desc = "Handle of kernel object";
+}
+
def : Enum {
let name = "ol_errc_t";
let desc = "Defines Return/Error codes";
diff --git a/offload/liboffload/API/Enqueue.td b/offload/liboffload/API/Enqueue.td
new file mode 100644
index 0000000000000..621eb3a2f410e
--- /dev/null
+++ b/offload/liboffload/API/Enqueue.td
@@ -0,0 +1,68 @@
+//===-- Enqueue.td - Enqueue definitions for Offload -------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains Offload API definitions related to enqueable operations
+//
+//===----------------------------------------------------------------------===//
+
+def : Function {
+ let name = "olEnqueueDataWrite";
+ let desc = "Enqueue a write operation from host to device memory";
+ let details = [];
+ let params = [
+ Param<"ol_queue_handle_t", "Queue", "handle of the queue", PARAM_IN>,
+ Param<"void*", "SrcPtr", "host pointer to copy from", PARAM_IN>,
+ Param<"void*", "DstPtr", "device pointer to copy to", PARAM_IN>,
+ Param<"size_t", "Size", "size in bytes of data to copy", PARAM_IN>,
+ Param<"ol_event_handle_t*", "EventOut", "optional recorded event for the enqueued operation", PARAM_OUT_OPTIONAL>
+ ];
+ let returns = [];
+}
+
+def : Function {
+ let name = "olEnqueueDataRead";
+ let desc = "Enqueue a read operation from device to host memory";
+ let details = [];
+ let params = [
+ Param<"ol_queue_handle_t", "Queue", "handle of the queue", PARAM_IN>,
+ Param<"void*", "SrcPtr", "device pointer to copy from", PARAM_IN>,
+ Param<"void*", "DstPtr", "host pointer to copy to", PARAM_IN>,
+ Param<"size_t", "Size", "size in bytes of data to copy", PARAM_IN>,
+ Param<"ol_event_handle_t*", "EventOut", "optional recorded event for the enqueued operation", PARAM_OUT_OPTIONAL>
+ ];
+ let returns = [];
+}
+
+def : Function {
+ let name = "olEnqueueDataCopy";
+ let desc = "Enqueue a write operation between device allocations";
+ let details = [];
+ let params = [
+ Param<"ol_queue_handle_t", "Queue", "handle of the queue", PARAM_IN>,
+ Param<"void*", "SrcPtr", "device pointer to copy from", PARAM_IN>,
+ Param<"void*", "DstPtr", "device pointer to copy to", PARAM_IN>,
+ Param<"ol_device_handle_t", "DstDevice", "device that the destination pointer is resident on", PARAM_IN>,
+ Param<"size_t", "Size", "size in bytes of data to copy", PARAM_IN>,
+ Param<"ol_event_handle_t*", "EventOut", "optional recorded event for the enqueued operation", PARAM_OUT_OPTIONAL>
+ ];
+ let returns = [];
+}
+
+
+def : Function {
+ let name = "olEnqueueKernelLaunch";
+ let desc = "Enqueue a kernel launch with the specified size and parameters";
+ let details = [];
+ let params = [
+ Param<"ol_queue_handle_t", "Queue", "handle of the queue", PARAM_IN>,
+ Param<"ol_kernel_handle_t", "Kernel", "handle of the kernel", PARAM_IN>,
+ Param<"const size_t*", "GlobalWorkSize", "an array of size 3 representing the global work size", PARAM_IN>,
+ Param<"ol_event_handle_t*", "EventOut", "optional recorded event for the enqueued operation", PARAM_OUT_OPTIONAL>
+ ];
+ let returns = [];
+}
diff --git a/offload/liboffload/API/Event.td b/offload/liboffload/API/Event.td
new file mode 100644
index 0000000000000..db90a7c8e2be4
--- /dev/null
+++ b/offload/liboffload/API/Event.td
@@ -0,0 +1,41 @@
+//===-- Event.td - Event definitions for Offload -----------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains Offload API definitions related to the event handle
+//
+//===----------------------------------------------------------------------===//
+
+def : Function {
+ let name = "olRetainEvent";
+ let desc = "Increment the reference count of the given event";
+ let details = [];
+ let params = [
+ Param<"ol_event_handle_t", "Event", "handle of the event", PARAM_IN>
+ ];
+ let returns = [];
+}
+
+def : Function {
+ let name = "olReleaseEvent";
+ let desc = "Decrement the reference count of the given event";
+ let details = [];
+ let params = [
+ Param<"ol_event_handle_t", "Event", "handle of the event", PARAM_IN>
+ ];
+ let returns = [];
+}
+
+def : Function {
+ let name = "olWaitEvent";
+ let desc = "Wait for the event to be complete";
+ let details = [];
+ let params = [
+ Param<"ol_event_handle_t", "Event", "handle of the event", PARAM_IN>
+ ];
+ let returns = [];
+}
diff --git a/offload/liboffload/API/Kernel.td b/offload/liboffload/API/Kernel.td
new file mode 100644
index 0000000000000..936372c18ca37
--- /dev/null
+++ b/offload/liboffload/API/Kernel.td
@@ -0,0 +1,44 @@
+def : Function {
+ let name = "olCreateKernel";
+ let desc = "";
+ let details = [];
+ let params = [
+ Param<"ol_program_handle_t", "Program", "handle of the program", PARAM_IN>,
+ Param<"const char*", "KernelName", "name of the kernel entry point in the program", PARAM_IN>,
+ Param<"ol_kernel_handle_t*", "Kernel", "output pointer for the created kernel", PARAM_OUT>
+ ];
+ let returns = [];
+}
+
+def : Function {
+ let name = "olRetainKernel";
+ let desc = "Create a queue for the given device";
+ let details = [];
+ let params = [
+ Param<"ol_kernel_handle_t", "Kernel", "handle of the kernel", PARAM_IN>
+ ];
+ let returns = [];
+}
+
+def : Function {
+ let name = "olReleaseKernel";
+ let desc = "Create a queue for the given device";
+ let details = [];
+ let params = [
+ Param<"ol_kernel_handle_t", "Kernel", "handle of the kernel", PARAM_IN>
+ ];
+ let returns = [];
+}
+
+def : Function {
+ let name = "olSetKernelArgValue";
+ let desc = "Create a queue for the given device";
+ let details = [];
+ let params = [
+ Param<"ol_kernel_handle_t", "Kernel", "handle of the kernel", PARAM_IN>,
+ Param<"uint32_t", "Index", "index of the argument", PARAM_IN>,
+ Param<"size_t", "Size", "size of the argument data", PARAM_IN>,
+ Param<"void*", "ArgData", "pointer to the argument data", PARAM_IN>
+ ];
+ let returns = [];
+}
diff --git a/offload/liboffload/API/OffloadAPI.td b/offload/liboffload/API/OffloadAPI.td
index a609cc7ac80b4..f2822b93e6bf8 100644
--- a/offload/liboffload/API/OffloadAPI.td
+++ b/offload/liboffload/API/OffloadAPI.td
@@ -14,3 +14,8 @@ include "Common.td"
include "Platform.td"
include "Device.td"
include "Memory.td"
+include "Queue.td"
+include "Event.td"
+include "Enqueue.td"
+include "Program.td"
+include "Kernel.td"
diff --git a/offload/liboffload/API/Program.td b/offload/liboffload/API/Program.td
new file mode 100644
index 0000000000000..684a6581320f8
--- /dev/null
+++ b/offload/liboffload/API/Program.td
@@ -0,0 +1,44 @@
+//===-- Program.td - Program definitions for Offload -------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains Offload API definitions related to the program handle
+//
+//===----------------------------------------------------------------------===//
+
+def : Function {
+ let name = "olCreateProgram";
+ let desc = "";
+ let details = [];
+ let params = [
+ Param<"ol_device_handle_t", "Device", "handle of the device", PARAM_IN>,
+ Param<"void*", "ProgData", "pointer to the program binary data", PARAM_IN>,
+ Param<"size_t", "ProgDataSize", "size of the program binary in bytes", PARAM_IN>,
+ Param<"ol_program_handle_t*", "Queue", "output pointer for the created program", PARAM_OUT>
+ ];
+ let returns = [];
+}
+
+def : Function {
+ let name = "olRetainProgram";
+ let desc = "Create a queue for the given device";
+ let details = [];
+ let params = [
+ Param<"ol_program_handle_t", "Program", "handle of the program", PARAM_IN>
+ ];
+ let returns = [];
+}
+
+def : Function {
+ let name = "olReleaseProgram";
+ let desc = "Create a queue for the given device";
+ let details = [];
+ let params = [
+ Param<"ol_program_handle_t", "Program", "handle of the program", PARAM_IN>
+ ];
+ let returns = [];
+}
diff --git a/offload/liboffload/API/Queue.td b/offload/liboffload/API/Queue.td
new file mode 100644
index 0000000000000..5629fa40d56d5
--- /dev/null
+++ b/offload/liboffload/API/Queue.td
@@ -0,0 +1,52 @@
+//===-- Queue.td - Queue definitions for Offload -----------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains Offload API definitions related to the queue handle
+//
+//===----------------------------------------------------------------------===//
+
+def : Function {
+ let name = "olCreateQueue";
+ let desc = "Create a queue for the given device";
+ let details = [];
+ let params = [
+ Param<"ol_device_handle_t", "Device", "handle of the device", PARAM_IN>,
+ Param<"ol_queue_handle_t*", "Queue", "output pointer for the created queue", PARAM_OUT>
+ ];
+ let returns = [];
+}
+
+def : Function {
+ let name = "olRetainQueue";
+ let desc = "Create a queue for the given device";
+ let details = [];
+ let params = [
+ Param<"ol_queue_handle_t", "Queue", "handle of the queue", PARAM_IN>
+ ];
+ let returns = [];
+}
+
+def : Function {
+ let name = "olReleaseQueue";
+ let desc = "Create a queue for the given device";
+ let details = [];
+ let params = [
+ Param<"ol_queue_handle_t", "Queue", "handle of the queue", PARAM_IN>
+ ];
+ let returns = [];
+}
+
+def : Function {
+ let name = "olFinishQueue";
+ let desc = "Wait for the enqueued work on a queue to complete";
+ let details = [];
+ let params = [
+ Param<"ol_queue_handle_t", "Queue", "handle of the queue", PARAM_IN>
+ ];
+ let returns = [];
+}
diff --git a/offload/liboffload/include/generated/OffloadAPI.h b/offload/liboffload/include/generated/OffloadAPI.h
index 4c3356645e55a..2384de19ae72e 100644
--- a/offload/liboffload/include/generated/OffloadAPI.h
+++ b/offload/liboffload/include/generated/OffloadAPI.h
@@ -85,6 +85,22 @@ typedef struct ol_device_handle_t_ *ol_device_handle_t;
/// @brief Handle of context object
typedef struct ol_context_handle_t_ *ol_context_handle_t;
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Handle of queue object
+typedef struct ol_queue_handle_t_ *ol_queue_handle_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Handle of event object
+typedef struct ol_event_handle_t_ *ol_event_handle_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Handle of program object
+typedef struct ol_program_handle_t_ *ol_program_handle_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Handle of kernel object
+typedef struct ol_kernel_handle_t_ *ol_kernel_handle_t;
+
///////////////////////////////////////////////////////////////////////////////
/// @brief Defines Return/Error codes
typedef enum ol_errc_t {
@@ -523,6 +539,359 @@ OL_APIEXPORT ol_result_t OL_APICALL olMemFree(
// [in] address of the allocation to free
void *Address);
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Create a queue for the given device
+///
+/// @details
+///
+/// @returns
+/// - ::OL_RESULT_SUCCESS
+/// - ::OL_ERRC_UNINITIALIZED
+/// - ::OL_ERRC_DEVICE_LOST
+/// - ::OL_ERRC_INVALID_NULL_HANDLE
+/// + `NULL == Device`
+/// - ::OL_ERRC_INVALID_NULL_POINTER
+/// + `NULL == Queue`
+OL_APIEXPORT ol_result_t OL_APICALL olCreateQueue(
+ // [in] handle of the device
+ ol_device_handle_t Device,
+ // [out] output pointer for the created queue
+ ol_queue_handle_t *Queue);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Create a queue for the given device
+///
+/// @details
+///
+/// @returns
+/// - ::OL_RESULT_SUCCESS
+/// - ::OL_ERRC_UNINITIALIZED
+/// - ::OL_ERRC_DEVICE_LOST
+/// - ::OL_ERRC_INVALID_NULL_HANDLE
+/// + `NULL == Queue`
+/// - ::OL_ERRC_INVALID_NULL_POINTER
+OL_APIEXPORT ol_result_t OL_APICALL olRetainQueue(
+ // [in] handle of the queue
+ ol_queue_handle_t Queue);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Create a queue for the given device
+///
+/// @details
+///
+/// @returns
+/// - ::OL_RESULT_SUCCESS
+/// - ::OL_ERRC_UNINITIALIZED
+/// - ::OL_ERRC_DEVICE_LOST
+/// - ::OL_ERRC_INVALID_NULL_HANDLE
+/// + `NULL == Queue`
+/// - ::OL_ERRC_INVALID_NULL_POINTER
+OL_APIEXPORT ol_result_t OL_APICALL olReleaseQueue(
+ // [in] handle of the queue
+ ol_queue_handle_t Queue);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Wait for the enqueued work on a queue to complete
+///
+/// @details
+///
+/// @returns
+/// - ::OL_RESULT_SUCCESS
+/// - ::OL_ERRC_UNINITIALIZED
+/// - ::OL_ERRC_DEVICE_LOST
+/// - ::OL_ERRC_INVALID_NULL_HANDLE
+/// + `NULL == Queue`
+/// - ::OL_ERRC_INVALID_NULL_POINTER
+OL_APIEXPORT ol_result_t OL_APICALL olFinishQueue(
+ // [in] handle of the queue
+ ol_queue_handle_t Queue);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Increment the reference count of the given event
+///
+/// @details
+///
+/// @returns
+/// - ::OL_RESULT_SUCCESS
+/// - ::OL_ERRC_UNINITIALIZED
+/// - ::OL_ERRC_DEVICE_LOST
+/// - ::OL_ERRC_INVALID_NULL_HANDLE
+/// + `NULL == Event`
+/// - ::OL_ERRC_INVALID_NULL_POINTER
+OL_APIEXPORT ol_result_t OL_APICALL olRetainEvent(
+ // [in] handle of the event
+ ol_event_handle_t Event);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Decrement the reference count of the given event
+///
+/// @details
+///
+/// @returns
+/// - ::OL_RESULT_SUCCESS
+/// - ::OL_ERRC_UNINITIALIZED
+/// - ::OL_ERRC_DEVICE_LOST
+/// - ::OL_ERRC_INVALID_NULL_HANDLE
+/// + `NULL == Event`
+/// - ::OL_ERRC_INVALID_NULL_POINTER
+OL_APIEXPORT ol_result_t OL_APICALL olReleaseEvent(
+ // [in] handle of the event
+ ol_event_handle_t Event);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Wait for the event to be complete
+///
+/// @details
+///
+/// @returns
+/// - ::OL_RESULT_SUCCESS
+/// - ::OL_ERRC_UNINITIALIZED
+/// - ::OL_ERRC_DEVICE_LOST
+/// - ::OL_ERRC_INVALID_NULL_HANDLE
+/// + `NULL == Event`
+/// - ::OL_ERRC_INVALID_NULL_POINTER
+OL_APIEXPORT ol_result_t OL_APICALL olWaitEvent(
+ // [in] handle of the event
+ ol_event_handle_t Event);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Enqueue a write operation from host to device memory
+///
+/// @details
+///
+/// @returns
+/// - ::OL_RESULT_SUCCESS
+/// - ::OL_ERRC_UNINITIALIZED
+/// - ::OL_ERRC_DEVICE_LOST
+/// - ::OL_ERRC_INVALID_NULL_HANDLE
+/// + `NULL == Queue`
+/// - ::OL_ERRC_INVALID_NULL_POINTER
+/// + `NULL == SrcPtr`
+/// + `NULL == DstPtr`
+OL_APIEXPORT ol_result_t OL_APICALL olEnqueueDataWrite(
+ // [in] handle of the queue
+ ol_queue_handle_t Queue,
+ // [in] host pointer to copy from
+ void *SrcPtr,
+ // [in] device pointer to copy to
+ void *DstPtr,
+ // [in] size in bytes of data to copy
+ size_t Size,
+ // [out][optional] optional recorded event for the enqueued operation
+ ol_event_handle_t *EventOut);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Enqueue a read operation from device to host memory
+///
+/// @details
+///
+/// @returns
+/// - ::OL_RESULT_SUCCESS
+/// - ::OL_ERRC_UNINITIALIZED
+/// - ::OL_ERRC_DEVICE_LOST
+/// - ::OL_ERRC_INVALID_NULL_HANDLE
+/// + `NULL == Queue`
+/// - ::OL_ERRC_INVALID_NULL_POINTER
+/// + `NULL == SrcPtr`
+/// + `NULL == DstPtr`
+OL_APIEXPORT ol_result_t OL_APICALL olEnqueueDataRead(
+ // [in] handle of the queue
+ ol_queue_handle_t Queue,
+ // [in] device pointer to copy from
+ void *SrcPtr,
+ // [in] host pointer to copy to
+ void *DstPtr,
+ // [in] size in bytes of data to copy
+ size_t Size,
+ // [out][optional] optional recorded event for the enqueued operation
+ ol_event_handle_t *EventOut);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Enqueue a write operation between device allocations
+///
+/// @details
+///
+/// @returns
+/// - ::OL_RESULT_SUCCESS
+/// - ::OL_ERRC_UNINITIALIZED
+/// - ::OL_ERRC_DEVICE_LOST
+/// - ::OL_ERRC_INVALID_NULL_HANDLE
+/// + `NULL == Queue`
+/// + `NULL == DstDevice`
+/// - ::OL_ERRC_INVALID_NULL_POINTER
+/// + `NULL == SrcPtr`
+/// + `NULL == DstPtr`
+OL_APIEXPORT ol_result_t OL_APICALL olEnqueueDataCopy(
+ // [in] handle of the queue
+ ol_queue_handle_t Queue,
+ // [in] device pointer to copy from
+ void *SrcPtr,
+ // [in] device pointer to copy to
+ void *DstPtr,
+ // [in] device that the destination pointer is resident on
+ ol_device_handle_t DstDevice,
+ // [in] size in bytes of data to copy
+ size_t Size,
+ // [out][optional] optional recorded event for the enqueued operation
+ ol_event_handle_t *EventOut);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Enqueue a kernel launch with the specified size and parameters
+///
+/// @details
+///
+/// @returns
+/// - ::OL_RESULT_SUCCESS
+/// - ::OL_ERRC_UNINITIALIZED
+/// - ::OL_ERRC_DEVICE_LOST
+/// - ::OL_ERRC_INVALID_NULL_HANDLE
+/// + `NULL == Queue`
+/// + `NULL == Kernel`
+/// - ::OL_ERRC_INVALID_NULL_POINTER
+/// + `NULL == GlobalWorkSize`
+OL_APIEXPORT ol_result_t OL_APICALL olEnqueueKernelLaunch(
+ // [in] handle of the queue
+ ol_queue_handle_t Queue,
+ // [in] handle of the kernel
+ ol_kernel_handle_t Kernel,
+ // [in] an array of size 3 representing the global work size
+ const size_t *GlobalWorkSize,
+ // [out][optional] optional recorded event for the enqueued operation
+ ol_event_handle_t *EventOut);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief
+///
+/// @details
+///
+/// @returns
+/// - ::OL_RESULT_SUCCESS
+/// - ::OL_ERRC_UNINITIALIZED
+/// - ::OL_ERRC_DEVICE_LOST
+/// - ::OL_ERRC_INVALID_NULL_HANDLE
+/// + `NULL == Device`
+/// - ::OL_ERRC_INVALID_NULL_POINTER
+/// + `NULL == ProgData`
+/// + `NULL == Queue`
+OL_APIEXPORT ol_result_t OL_APICALL olCreateProgram(
+ // [in] handle of the device
+ ol_device_handle_t Device,
+ // [in] pointer to the program binary data
+ void *ProgData,
+ // [in] size of the program binary in bytes
+ size_t ProgDataSize,
+ // [out] output pointer for the created program
+ ol_program_handle_t *Queue);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Create a queue for the given device
+///
+/// @details
+///
+/// @returns
+/// - ::OL_RESULT_SUCCESS
+/// - ::OL_ERRC_UNINITIALIZED
+/// - ::OL_ERRC_DEVICE_LOST
+/// - ::OL_ERRC_INVALID_NULL_HANDLE
+/// + `NULL == Program`
+/// - ::OL_ERRC_INVALID_NULL_POINTER
+OL_APIEXPORT ol_result_t OL_APICALL olRetainProgram(
+ // [in] handle of the program
+ ol_program_handle_t Program);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Create a queue for the given device
+///
+/// @details
+///
+/// @returns
+/// - ::OL_RESULT_SUCCESS
+/// - ::OL_ERRC_UNINITIALIZED
+/// - ::OL_ERRC_DEVICE_LOST
+/// - ::OL_ERRC_INVALID_NULL_HANDLE
+/// + `NULL == Program`
+/// - ::OL_ERRC_INVALID_NULL_POINTER
+OL_APIEXPORT ol_result_t OL_APICALL olReleaseProgram(
+ // [in] handle of the program
+ ol_program_handle_t Program);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief
+///
+/// @details
+///
+/// @returns
+/// - ::OL_RESULT_SUCCESS
+/// - ::OL_ERRC_UNINITIALIZED
+/// - ::OL_ERRC_DEVICE_LOST
+/// - ::OL_ERRC_INVALID_NULL_HANDLE
+/// + `NULL == Program`
+/// - ::OL_ERRC_INVALID_NULL_POINTER
+/// + `NULL == KernelName`
+/// + `NULL == Kernel`
+OL_APIEXPORT ol_result_t OL_APICALL olCreateKernel(
+ // [in] handle of the program
+ ol_program_handle_t Program,
+ // [in] name of the kernel entry point in the program
+ const char *KernelName,
+ // [out] output pointer for the created kernel
+ ol_kernel_handle_t *Kernel);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Create a queue for the given device
+///
+/// @details
+///
+/// @returns
+/// - ::OL_RESULT_SUCCESS
+/// - ::OL_ERRC_UNINITIALIZED
+/// - ::OL_ERRC_DEVICE_LOST
+/// - ::OL_ERRC_INVALID_NULL_HANDLE
+/// + `NULL == Kernel`
+/// - ::OL_ERRC_INVALID_NULL_POINTER
+OL_APIEXPORT ol_result_t OL_APICALL olRetainKernel(
+ // [in] handle of the kernel
+ ol_kernel_handle_t Kernel);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Create a queue for the given device
+///
+/// @details
+///
+/// @returns
+/// - ::OL_RESULT_SUCCESS
+/// - ::OL_ERRC_UNINITIALIZED
+/// - ::OL_ERRC_DEVICE_LOST
+/// - ::OL_ERRC_INVALID_NULL_HANDLE
+/// + `NULL == Kernel`
+/// - ::OL_ERRC_INVALID_NULL_POINTER
+OL_APIEXPORT ol_result_t OL_APICALL olReleaseKernel(
+ // [in] handle of the kernel
+ ol_kernel_handle_t Kernel);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Create a queue for the given device
+///
+/// @details
+///
+/// @returns
+/// - ::OL_RESULT_SUCCESS
+/// - ::OL_ERRC_UNINITIALIZED
+/// - ::OL_ERRC_DEVICE_LOST
+/// - ::OL_ERRC_INVALID_NULL_HANDLE
+/// + `NULL == Kernel`
+/// - ::OL_ERRC_INVALID_NULL_POINTER
+/// + `NULL == ArgData`
+OL_APIEXPORT ol_result_t OL_APICALL olSetKernelArgValue(
+ // [in] handle of the kernel
+ ol_kernel_handle_t Kernel,
+ // [in] index of the argument
+ uint32_t Index,
+ // [in] size of the argument data
+ size_t Size,
+ // [in] pointer to the argument data
+ void *ArgData);
+
///////////////////////////////////////////////////////////////////////////////
/// @brief Function parameters for olGetPlatform
/// @details Each entry is a pointer to the parameter passed to the function;
@@ -613,6 +982,157 @@ typedef struct ol_mem_free_params_t {
void **pAddress;
} ol_mem_free_params_t;
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Function parameters for olCreateQueue
+/// @details Each entry is a pointer to the parameter passed to the function;
+typedef struct ol_create_queue_params_t {
+ ol_device_handle_t *pDevice;
+ ol_queue_handle_t **pQueue;
+} ol_create_queue_params_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Function parameters for olRetainQueue
+/// @details Each entry is a pointer to the parameter passed to the function;
+typedef struct ol_retain_queue_params_t {
+ ol_queue_handle_t *pQueue;
+} ol_retain_queue_params_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Function parameters for olReleaseQueue
+/// @details Each entry is a pointer to the parameter passed to the function;
+typedef struct ol_release_queue_params_t {
+ ol_queue_handle_t *pQueue;
+} ol_release_queue_params_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Function parameters for olFinishQueue
+/// @details Each entry is a pointer to the parameter passed to the function;
+typedef struct ol_finish_queue_params_t {
+ ol_queue_handle_t *pQueue;
+} ol_finish_queue_params_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Function parameters for olRetainEvent
+/// @details Each entry is a pointer to the parameter passed to the function;
+typedef struct ol_retain_event_params_t {
+ ol_event_handle_t *pEvent;
+} ol_retain_event_params_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Function parameters for olReleaseEvent
+/// @details Each entry is a pointer to the parameter passed to the function;
+typedef struct ol_release_event_params_t {
+ ol_event_handle_t *pEvent;
+} ol_release_event_params_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Function parameters for olWaitEvent
+/// @details Each entry is a pointer to the parameter passed to the function;
+typedef struct ol_wait_event_params_t {
+ ol_event_handle_t *pEvent;
+} ol_wait_event_params_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Function parameters for olEnqueueDataWrite
+/// @details Each entry is a pointer to the parameter passed to the function;
+typedef struct ol_enqueue_data_write_params_t {
+ ol_queue_handle_t *pQueue;
+ void **pSrcPtr;
+ void **pDstPtr;
+ size_t *pSize;
+ ol_event_handle_t **pEventOut;
+} ol_enqueue_data_write_params_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Function parameters for olEnqueueDataRead
+/// @details Each entry is a pointer to the parameter passed to the function;
+typedef struct ol_enqueue_data_read_params_t {
+ ol_queue_handle_t *pQueue;
+ void **pSrcPtr;
+ void **pDstPtr;
+ size_t *pSize;
+ ol_event_handle_t **pEventOut;
+} ol_enqueue_data_read_params_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Function parameters for olEnqueueDataCopy
+/// @details Each entry is a pointer to the parameter passed to the function;
+typedef struct ol_enqueue_data_copy_params_t {
+ ol_queue_handle_t *pQueue;
+ void **pSrcPtr;
+ void **pDstPtr;
+ ol_device_handle_t *pDstDevice;
+ size_t *pSize;
+ ol_event_handle_t **pEventOut;
+} ol_enqueue_data_copy_params_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Function parameters for olEnqueueKernelLaunch
+/// @details Each entry is a pointer to the parameter passed to the function;
+typedef struct ol_enqueue_kernel_launch_params_t {
+ ol_queue_handle_t *pQueue;
+ ol_kernel_handle_t *pKernel;
+ const size_t **pGlobalWorkSize;
+ ol_event_handle_t **pEventOut;
+} ol_enqueue_kernel_launch_params_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Function parameters for olCreateProgram
+/// @details Each entry is a pointer to the parameter passed to the function;
+typedef struct ol_create_program_params_t {
+ ol_device_handle_t *pDevice;
+ void **pProgData;
+ size_t *pProgDataSize;
+ ol_program_handle_t **pQueue;
+} ol_create_program_params_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Function parameters for olRetainProgram
+/// @details Each entry is a pointer to the parameter passed to the function;
+typedef struct ol_retain_program_params_t {
+ ol_program_handle_t *pProgram;
+} ol_retain_program_params_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Function parameters for olReleaseProgram
+/// @details Each entry is a pointer to the parameter passed to the function;
+typedef struct ol_release_program_params_t {
+ ol_program_handle_t *pProgram;
+} ol_release_program_params_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Function parameters for olCreateKernel
+/// @details Each entry is a pointer to the parameter passed to the function;
+typedef struct ol_create_kernel_params_t {
+ ol_program_handle_t *pProgram;
+ const char **pKernelName;
+ ol_kernel_handle_t **pKernel;
+} ol_create_kernel_params_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Function parameters for olRetainKernel
+/// @details Each entry is a pointer to the parameter passed to the function;
+typedef struct ol_retain_kernel_params_t {
+ ol_kernel_handle_t *pKernel;
+} ol_retain_kernel_params_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Function parameters for olReleaseKernel
+/// @details Each entry is a pointer to the parameter passed to the function;
+typedef struct ol_release_kernel_params_t {
+ ol_kernel_handle_t *pKernel;
+} ol_release_kernel_params_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Function parameters for olSetKernelArgValue
+/// @details Each entry is a pointer to the parameter passed to the function;
+typedef struct ol_set_kernel_arg_value_params_t {
+ ol_kernel_handle_t *pKernel;
+ uint32_t *pIndex;
+ size_t *pSize;
+ void **pArgData;
+} ol_set_kernel_arg_value_params_t;
+
///////////////////////////////////////////////////////////////////////////////
/// @brief Variant of olInit that also sets source code location information
/// @details See also ::olInit
@@ -702,6 +1222,142 @@ OL_APIEXPORT ol_result_t OL_APICALL
olMemFreeWithCodeLoc(ol_device_handle_t Device, ol_alloc_type_t Type,
void *Address, ol_code_location_t *CodeLocation);
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Variant of olCreateQueue that also sets source code location
+/// information
+/// @details See also ::olCreateQueue
+OL_APIEXPORT ol_result_t OL_APICALL
+olCreateQueueWithCodeLoc(ol_device_handle_t Device, ol_queue_handle_t *Queue,
+ ol_code_location_t *CodeLocation);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Variant of olRetainQueue that also sets source code location
+/// information
+/// @details See also ::olRetainQueue
+OL_APIEXPORT ol_result_t OL_APICALL olRetainQueueWithCodeLoc(
+ ol_queue_handle_t Queue, ol_code_location_t *CodeLocation);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Variant of olReleaseQueue that also sets source code location
+/// information
+/// @details See also ::olReleaseQueue
+OL_APIEXPORT ol_result_t OL_APICALL olReleaseQueueWithCodeLoc(
+ ol_queue_handle_t Queue, ol_code_location_t *CodeLocation);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Variant of olFinishQueue that also sets source code location
+/// information
+/// @details See also ::olFinishQueue
+OL_APIEXPORT ol_result_t OL_APICALL olFinishQueueWithCodeLoc(
+ ol_queue_handle_t Queue, ol_code_location_t *CodeLocation);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Variant of olRetainEvent that also sets source code location
+/// information
+/// @details See also ::olRetainEvent
+OL_APIEXPORT ol_result_t OL_APICALL olRetainEventWithCodeLoc(
+ ol_event_handle_t Event, ol_code_location_t *CodeLocation);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Variant of olReleaseEvent that also sets source code location
+/// information
+/// @details See also ::olReleaseEvent
+OL_APIEXPORT ol_result_t OL_APICALL olReleaseEventWithCodeLoc(
+ ol_event_handle_t Event, ol_code_location_t *CodeLocation);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Variant of olWaitEvent that also sets source code location
+/// information
+/// @details See also ::olWaitEvent
+OL_APIEXPORT ol_result_t OL_APICALL olWaitEventWithCodeLoc(
+ ol_event_handle_t Event, ol_code_location_t *CodeLocation);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Variant of olEnqueueDataWrite that also sets source code location
+/// information
+/// @details See also ::olEnqueueDataWrite
+OL_APIEXPORT ol_result_t OL_APICALL olEnqueueDataWriteWithCodeLoc(
+ ol_queue_handle_t Queue, void *SrcPtr, void *DstPtr, size_t Size,
+ ol_event_handle_t *EventOut, ol_code_location_t *CodeLocation);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Variant of olEnqueueDataRead that also sets source code location
+/// information
+/// @details See also ::olEnqueueDataRead
+OL_APIEXPORT ol_result_t OL_APICALL olEnqueueDataReadWithCodeLoc(
+ ol_queue_handle_t Queue, void *SrcPtr, void *DstPtr, size_t Size,
+ ol_event_handle_t *EventOut, ol_code_location_t *CodeLocation);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Variant of olEnqueueDataCopy that also sets source code location
+/// information
+/// @details See also ::olEnqueueDataCopy
+OL_APIEXPORT ol_result_t OL_APICALL olEnqueueDataCopyWithCodeLoc(
+ ol_queue_handle_t Queue, void *SrcPtr, void *DstPtr,
+ ol_device_handle_t DstDevice, size_t Size, ol_event_handle_t *EventOut,
+ ol_code_location_t *CodeLocation);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Variant of olEnqueueKernelLaunch that also sets source code location
+/// information
+/// @details See also ::olEnqueueKernelLaunch
+OL_APIEXPORT ol_result_t OL_APICALL olEnqueueKernelLaunchWithCodeLoc(
+ ol_queue_handle_t Queue, ol_kernel_handle_t Kernel,
+ const size_t *GlobalWorkSize, ol_event_handle_t *EventOut,
+ ol_code_location_t *CodeLocation);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Variant of olCreateProgram that also sets source code location
+/// information
+/// @details See also ::olCreateProgram
+OL_APIEXPORT ol_result_t OL_APICALL olCreateProgramWithCodeLoc(
+ ol_device_handle_t Device, void *ProgData, size_t ProgDataSize,
+ ol_program_handle_t *Queue, ol_code_location_t *CodeLocation);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Variant of olRetainProgram that also sets source code location
+/// information
+/// @details See also ::olRetainProgram
+OL_APIEXPORT ol_result_t OL_APICALL olRetainProgramWithCodeLoc(
+ ol_program_handle_t Program, ol_code_location_t *CodeLocation);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Variant of olReleaseProgram that also sets source code location
+/// information
+/// @details See also ::olReleaseProgram
+OL_APIEXPORT ol_result_t OL_APICALL olReleaseProgramWithCodeLoc(
+ ol_program_handle_t Program, ol_code_location_t *CodeLocation);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Variant of olCreateKernel that also sets source code location
+/// information
+/// @details See also ::olCreateKernel
+OL_APIEXPORT ol_result_t OL_APICALL olCreateKernelWithCodeLoc(
+ ol_program_handle_t Program, const char *KernelName,
+ ol_kernel_handle_t *Kernel, ol_code_location_t *CodeLocation);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Variant of olRetainKernel that also sets source code location
+/// information
+/// @details See also ::olRetainKernel
+OL_APIEXPORT ol_result_t OL_APICALL olRetainKernelWithCodeLoc(
+ ol_kernel_handle_t Kernel, ol_code_location_t *CodeLocation);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Variant of olReleaseKernel that also sets source code location
+/// information
+/// @details See also ::olReleaseKernel
+OL_APIEXPORT ol_result_t OL_APICALL olReleaseKernelWithCodeLoc(
+ ol_kernel_handle_t Kernel, ol_code_location_t *CodeLocation);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Variant of olSetKernelArgValue that also sets source code location
+/// information
+/// @details See also ::olSetKernelArgValue
+OL_APIEXPORT ol_result_t OL_APICALL olSetKernelArgValueWithCodeLoc(
+ ol_kernel_handle_t Kernel, uint32_t Index, size_t Size, void *ArgData,
+ ol_code_location_t *CodeLocation);
+
#if defined(__cplusplus)
} // extern "C"
#endif
diff --git a/offload/liboffload/include/generated/OffloadEntryPoints.inc b/offload/liboffload/include/generated/OffloadEntryPoints.inc
index bcde65452b265..0ae3c36f95827 100644
--- a/offload/liboffload/include/generated/OffloadEntryPoints.inc
+++ b/offload/liboffload/include/generated/OffloadEntryPoints.inc
@@ -536,3 +536,778 @@ ol_result_t olMemFreeWithCodeLoc(ol_device_handle_t Device,
currentCodeLocation() = nullptr;
return Result;
}
+
+///////////////////////////////////////////////////////////////////////////////
+ol_impl_result_t olCreateQueue_val(ol_device_handle_t Device,
+ ol_queue_handle_t *Queue) {
+ if (true /*enableParameterValidation*/) {
+ if (NULL == Device) {
+ return OL_ERRC_INVALID_NULL_HANDLE;
+ }
+
+ if (NULL == Queue) {
+ return OL_ERRC_INVALID_NULL_POINTER;
+ }
+ }
+
+ return olCreateQueue_impl(Device, Queue);
+}
+OL_APIEXPORT ol_result_t OL_APICALL olCreateQueue(ol_device_handle_t Device,
+ ol_queue_handle_t *Queue) {
+ if (offloadConfig().TracingEnabled) {
+ std::cout << "---> olCreateQueue";
+ }
+
+ ol_result_t Result = olCreateQueue_val(Device, Queue);
+
+ if (offloadConfig().TracingEnabled) {
+ ol_create_queue_params_t Params = {&Device, &Queue};
+ std::cout << "(" << &Params << ")";
+ std::cout << "-> " << Result << "\n";
+ if (Result && Result->Details) {
+ std::cout << " *Error Details* " << Result->Details << " \n";
+ }
+ }
+ return Result;
+}
+ol_result_t olCreateQueueWithCodeLoc(ol_device_handle_t Device,
+ ol_queue_handle_t *Queue,
+ ol_code_location_t *CodeLocation) {
+ currentCodeLocation() = CodeLocation;
+ ol_result_t Result = olCreateQueue(Device, Queue);
+
+ currentCodeLocation() = nullptr;
+ return Result;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+ol_impl_result_t olRetainQueue_val(ol_queue_handle_t Queue) {
+ if (true /*enableParameterValidation*/) {
+ if (NULL == Queue) {
+ return OL_ERRC_INVALID_NULL_HANDLE;
+ }
+ }
+
+ return olRetainQueue_impl(Queue);
+}
+OL_APIEXPORT ol_result_t OL_APICALL olRetainQueue(ol_queue_handle_t Queue) {
+ if (offloadConfig().TracingEnabled) {
+ std::cout << "---> olRetainQueue";
+ }
+
+ ol_result_t Result = olRetainQueue_val(Queue);
+
+ if (offloadConfig().TracingEnabled) {
+ ol_retain_queue_params_t Params = {&Queue};
+ std::cout << "(" << &Params << ")";
+ std::cout << "-> " << Result << "\n";
+ if (Result && Result->Details) {
+ std::cout << " *Error Details* " << Result->Details << " \n";
+ }
+ }
+ return Result;
+}
+ol_result_t olRetainQueueWithCodeLoc(ol_queue_handle_t Queue,
+ ol_code_location_t *CodeLocation) {
+ currentCodeLocation() = CodeLocation;
+ ol_result_t Result = olRetainQueue(Queue);
+
+ currentCodeLocation() = nullptr;
+ return Result;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+ol_impl_result_t olReleaseQueue_val(ol_queue_handle_t Queue) {
+ if (true /*enableParameterValidation*/) {
+ if (NULL == Queue) {
+ return OL_ERRC_INVALID_NULL_HANDLE;
+ }
+ }
+
+ return olReleaseQueue_impl(Queue);
+}
+OL_APIEXPORT ol_result_t OL_APICALL olReleaseQueue(ol_queue_handle_t Queue) {
+ if (offloadConfig().TracingEnabled) {
+ std::cout << "---> olReleaseQueue";
+ }
+
+ ol_result_t Result = olReleaseQueue_val(Queue);
+
+ if (offloadConfig().TracingEnabled) {
+ ol_release_queue_params_t Params = {&Queue};
+ std::cout << "(" << &Params << ")";
+ std::cout << "-> " << Result << "\n";
+ if (Result && Result->Details) {
+ std::cout << " *Error Details* " << Result->Details << " \n";
+ }
+ }
+ return Result;
+}
+ol_result_t olReleaseQueueWithCodeLoc(ol_queue_handle_t Queue,
+ ol_code_location_t *CodeLocation) {
+ currentCodeLocation() = CodeLocation;
+ ol_result_t Result = olReleaseQueue(Queue);
+
+ currentCodeLocation() = nullptr;
+ return Result;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+ol_impl_result_t olFinishQueue_val(ol_queue_handle_t Queue) {
+ if (true /*enableParameterValidation*/) {
+ if (NULL == Queue) {
+ return OL_ERRC_INVALID_NULL_HANDLE;
+ }
+ }
+
+ return olFinishQueue_impl(Queue);
+}
+OL_APIEXPORT ol_result_t OL_APICALL olFinishQueue(ol_queue_handle_t Queue) {
+ if (offloadConfig().TracingEnabled) {
+ std::cout << "---> olFinishQueue";
+ }
+
+ ol_result_t Result = olFinishQueue_val(Queue);
+
+ if (offloadConfig().TracingEnabled) {
+ ol_finish_queue_params_t Params = {&Queue};
+ std::cout << "(" << &Params << ")";
+ std::cout << "-> " << Result << "\n";
+ if (Result && Result->Details) {
+ std::cout << " *Error Details* " << Result->Details << " \n";
+ }
+ }
+ return Result;
+}
+ol_result_t olFinishQueueWithCodeLoc(ol_queue_handle_t Queue,
+ ol_code_location_t *CodeLocation) {
+ currentCodeLocation() = CodeLocation;
+ ol_result_t Result = olFinishQueue(Queue);
+
+ currentCodeLocation() = nullptr;
+ return Result;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+ol_impl_result_t olRetainEvent_val(ol_event_handle_t Event) {
+ if (true /*enableParameterValidation*/) {
+ if (NULL == Event) {
+ return OL_ERRC_INVALID_NULL_HANDLE;
+ }
+ }
+
+ return olRetainEvent_impl(Event);
+}
+OL_APIEXPORT ol_result_t OL_APICALL olRetainEvent(ol_event_handle_t Event) {
+ if (offloadConfig().TracingEnabled) {
+ std::cout << "---> olRetainEvent";
+ }
+
+ ol_result_t Result = olRetainEvent_val(Event);
+
+ if (offloadConfig().TracingEnabled) {
+ ol_retain_event_params_t Params = {&Event};
+ std::cout << "(" << &Params << ")";
+ std::cout << "-> " << Result << "\n";
+ if (Result && Result->Details) {
+ std::cout << " *Error Details* " << Result->Details << " \n";
+ }
+ }
+ return Result;
+}
+ol_result_t olRetainEventWithCodeLoc(ol_event_handle_t Event,
+ ol_code_location_t *CodeLocation) {
+ currentCodeLocation() = CodeLocation;
+ ol_result_t Result = olRetainEvent(Event);
+
+ currentCodeLocation() = nullptr;
+ return Result;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+ol_impl_result_t olReleaseEvent_val(ol_event_handle_t Event) {
+ if (true /*enableParameterValidation*/) {
+ if (NULL == Event) {
+ return OL_ERRC_INVALID_NULL_HANDLE;
+ }
+ }
+
+ return olReleaseEvent_impl(Event);
+}
+OL_APIEXPORT ol_result_t OL_APICALL olReleaseEvent(ol_event_handle_t Event) {
+ if (offloadConfig().TracingEnabled) {
+ std::cout << "---> olReleaseEvent";
+ }
+
+ ol_result_t Result = olReleaseEvent_val(Event);
+
+ if (offloadConfig().TracingEnabled) {
+ ol_release_event_params_t Params = {&Event};
+ std::cout << "(" << &Params << ")";
+ std::cout << "-> " << Result << "\n";
+ if (Result && Result->Details) {
+ std::cout << " *Error Details* " << Result->Details << " \n";
+ }
+ }
+ return Result;
+}
+ol_result_t olReleaseEventWithCodeLoc(ol_event_handle_t Event,
+ ol_code_location_t *CodeLocation) {
+ currentCodeLocation() = CodeLocation;
+ ol_result_t Result = olReleaseEvent(Event);
+
+ currentCodeLocation() = nullptr;
+ return Result;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+ol_impl_result_t olWaitEvent_val(ol_event_handle_t Event) {
+ if (true /*enableParameterValidation*/) {
+ if (NULL == Event) {
+ return OL_ERRC_INVALID_NULL_HANDLE;
+ }
+ }
+
+ return olWaitEvent_impl(Event);
+}
+OL_APIEXPORT ol_result_t OL_APICALL olWaitEvent(ol_event_handle_t Event) {
+ if (offloadConfig().TracingEnabled) {
+ std::cout << "---> olWaitEvent";
+ }
+
+ ol_result_t Result = olWaitEvent_val(Event);
+
+ if (offloadConfig().TracingEnabled) {
+ ol_wait_event_params_t Params = {&Event};
+ std::cout << "(" << &Params << ")";
+ std::cout << "-> " << Result << "\n";
+ if (Result && Result->Details) {
+ std::cout << " *Error Details* " << Result->Details << " \n";
+ }
+ }
+ return Result;
+}
+ol_result_t olWaitEventWithCodeLoc(ol_event_handle_t Event,
+ ol_code_location_t *CodeLocation) {
+ currentCodeLocation() = CodeLocation;
+ ol_result_t Result = olWaitEvent(Event);
+
+ currentCodeLocation() = nullptr;
+ return Result;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+ol_impl_result_t olEnqueueDataWrite_val(ol_queue_handle_t Queue, void *SrcPtr,
+ void *DstPtr, size_t Size,
+ ol_event_handle_t *EventOut) {
+ if (true /*enableParameterValidation*/) {
+ if (NULL == Queue) {
+ return OL_ERRC_INVALID_NULL_HANDLE;
+ }
+
+ if (NULL == SrcPtr) {
+ return OL_ERRC_INVALID_NULL_POINTER;
+ }
+
+ if (NULL == DstPtr) {
+ return OL_ERRC_INVALID_NULL_POINTER;
+ }
+ }
+
+ return olEnqueueDataWrite_impl(Queue, SrcPtr, DstPtr, Size, EventOut);
+}
+OL_APIEXPORT ol_result_t OL_APICALL
+olEnqueueDataWrite(ol_queue_handle_t Queue, void *SrcPtr, void *DstPtr,
+ size_t Size, ol_event_handle_t *EventOut) {
+ if (offloadConfig().TracingEnabled) {
+ std::cout << "---> olEnqueueDataWrite";
+ }
+
+ ol_result_t Result =
+ olEnqueueDataWrite_val(Queue, SrcPtr, DstPtr, Size, EventOut);
+
+ if (offloadConfig().TracingEnabled) {
+ ol_enqueue_data_write_params_t Params = {&Queue, &SrcPtr, &DstPtr, &Size,
+ &EventOut};
+ std::cout << "(" << &Params << ")";
+ std::cout << "-> " << Result << "\n";
+ if (Result && Result->Details) {
+ std::cout << " *Error Details* " << Result->Details << " \n";
+ }
+ }
+ return Result;
+}
+ol_result_t olEnqueueDataWriteWithCodeLoc(ol_queue_handle_t Queue, void *SrcPtr,
+ void *DstPtr, size_t Size,
+ ol_event_handle_t *EventOut,
+ ol_code_location_t *CodeLocation) {
+ currentCodeLocation() = CodeLocation;
+ ol_result_t Result =
+ olEnqueueDataWrite(Queue, SrcPtr, DstPtr, Size, EventOut);
+
+ currentCodeLocation() = nullptr;
+ return Result;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+ol_impl_result_t olEnqueueDataRead_val(ol_queue_handle_t Queue, void *SrcPtr,
+ void *DstPtr, size_t Size,
+ ol_event_handle_t *EventOut) {
+ if (true /*enableParameterValidation*/) {
+ if (NULL == Queue) {
+ return OL_ERRC_INVALID_NULL_HANDLE;
+ }
+
+ if (NULL == SrcPtr) {
+ return OL_ERRC_INVALID_NULL_POINTER;
+ }
+
+ if (NULL == DstPtr) {
+ return OL_ERRC_INVALID_NULL_POINTER;
+ }
+ }
+
+ return olEnqueueDataRead_impl(Queue, SrcPtr, DstPtr, Size, EventOut);
+}
+OL_APIEXPORT ol_result_t OL_APICALL
+olEnqueueDataRead(ol_queue_handle_t Queue, void *SrcPtr, void *DstPtr,
+ size_t Size, ol_event_handle_t *EventOut) {
+ if (offloadConfig().TracingEnabled) {
+ std::cout << "---> olEnqueueDataRead";
+ }
+
+ ol_result_t Result =
+ olEnqueueDataRead_val(Queue, SrcPtr, DstPtr, Size, EventOut);
+
+ if (offloadConfig().TracingEnabled) {
+ ol_enqueue_data_read_params_t Params = {&Queue, &SrcPtr, &DstPtr, &Size,
+ &EventOut};
+ std::cout << "(" << &Params << ")";
+ std::cout << "-> " << Result << "\n";
+ if (Result && Result->Details) {
+ std::cout << " *Error Details* " << Result->Details << " \n";
+ }
+ }
+ return Result;
+}
+ol_result_t olEnqueueDataReadWithCodeLoc(ol_queue_handle_t Queue, void *SrcPtr,
+ void *DstPtr, size_t Size,
+ ol_event_handle_t *EventOut,
+ ol_code_location_t *CodeLocation) {
+ currentCodeLocation() = CodeLocation;
+ ol_result_t Result = olEnqueueDataRead(Queue, SrcPtr, DstPtr, Size, EventOut);
+
+ currentCodeLocation() = nullptr;
+ return Result;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+ol_impl_result_t olEnqueueDataCopy_val(ol_queue_handle_t Queue, void *SrcPtr,
+ void *DstPtr,
+ ol_device_handle_t DstDevice,
+ size_t Size,
+ ol_event_handle_t *EventOut) {
+ if (true /*enableParameterValidation*/) {
+ if (NULL == Queue) {
+ return OL_ERRC_INVALID_NULL_HANDLE;
+ }
+
+ if (NULL == DstDevice) {
+ return OL_ERRC_INVALID_NULL_HANDLE;
+ }
+
+ if (NULL == SrcPtr) {
+ return OL_ERRC_INVALID_NULL_POINTER;
+ }
+
+ if (NULL == DstPtr) {
+ return OL_ERRC_INVALID_NULL_POINTER;
+ }
+ }
+
+ return olEnqueueDataCopy_impl(Queue, SrcPtr, DstPtr, DstDevice, Size,
+ EventOut);
+}
+OL_APIEXPORT ol_result_t OL_APICALL olEnqueueDataCopy(
+ ol_queue_handle_t Queue, void *SrcPtr, void *DstPtr,
+ ol_device_handle_t DstDevice, size_t Size, ol_event_handle_t *EventOut) {
+ if (offloadConfig().TracingEnabled) {
+ std::cout << "---> olEnqueueDataCopy";
+ }
+
+ ol_result_t Result =
+ olEnqueueDataCopy_val(Queue, SrcPtr, DstPtr, DstDevice, Size, EventOut);
+
+ if (offloadConfig().TracingEnabled) {
+ ol_enqueue_data_copy_params_t Params = {&Queue, &SrcPtr, &DstPtr,
+ &DstDevice, &Size, &EventOut};
+ std::cout << "(" << &Params << ")";
+ std::cout << "-> " << Result << "\n";
+ if (Result && Result->Details) {
+ std::cout << " *Error Details* " << Result->Details << " \n";
+ }
+ }
+ return Result;
+}
+ol_result_t olEnqueueDataCopyWithCodeLoc(ol_queue_handle_t Queue, void *SrcPtr,
+ void *DstPtr,
+ ol_device_handle_t DstDevice,
+ size_t Size,
+ ol_event_handle_t *EventOut,
+ ol_code_location_t *CodeLocation) {
+ currentCodeLocation() = CodeLocation;
+ ol_result_t Result =
+ olEnqueueDataCopy(Queue, SrcPtr, DstPtr, DstDevice, Size, EventOut);
+
+ currentCodeLocation() = nullptr;
+ return Result;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+ol_impl_result_t olEnqueueKernelLaunch_val(ol_queue_handle_t Queue,
+ ol_kernel_handle_t Kernel,
+ const size_t *GlobalWorkSize,
+ ol_event_handle_t *EventOut) {
+ if (true /*enableParameterValidation*/) {
+ if (NULL == Queue) {
+ return OL_ERRC_INVALID_NULL_HANDLE;
+ }
+
+ if (NULL == Kernel) {
+ return OL_ERRC_INVALID_NULL_HANDLE;
+ }
+
+ if (NULL == GlobalWorkSize) {
+ return OL_ERRC_INVALID_NULL_POINTER;
+ }
+ }
+
+ return olEnqueueKernelLaunch_impl(Queue, Kernel, GlobalWorkSize, EventOut);
+}
+OL_APIEXPORT ol_result_t OL_APICALL olEnqueueKernelLaunch(
+ ol_queue_handle_t Queue, ol_kernel_handle_t Kernel,
+ const size_t *GlobalWorkSize, ol_event_handle_t *EventOut) {
+ if (offloadConfig().TracingEnabled) {
+ std::cout << "---> olEnqueueKernelLaunch";
+ }
+
+ ol_result_t Result =
+ olEnqueueKernelLaunch_val(Queue, Kernel, GlobalWorkSize, EventOut);
+
+ if (offloadConfig().TracingEnabled) {
+ ol_enqueue_kernel_launch_params_t Params = {&Queue, &Kernel,
+ &GlobalWorkSize, &EventOut};
+ std::cout << "(" << &Params << ")";
+ std::cout << "-> " << Result << "\n";
+ if (Result && Result->Details) {
+ std::cout << " *Error Details* " << Result->Details << " \n";
+ }
+ }
+ return Result;
+}
+ol_result_t olEnqueueKernelLaunchWithCodeLoc(ol_queue_handle_t Queue,
+ ol_kernel_handle_t Kernel,
+ const size_t *GlobalWorkSize,
+ ol_event_handle_t *EventOut,
+ ol_code_location_t *CodeLocation) {
+ currentCodeLocation() = CodeLocation;
+ ol_result_t Result =
+ olEnqueueKernelLaunch(Queue, Kernel, GlobalWorkSize, EventOut);
+
+ currentCodeLocation() = nullptr;
+ return Result;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+ol_impl_result_t olCreateProgram_val(ol_device_handle_t Device, void *ProgData,
+ size_t ProgDataSize,
+ ol_program_handle_t *Queue) {
+ if (true /*enableParameterValidation*/) {
+ if (NULL == Device) {
+ return OL_ERRC_INVALID_NULL_HANDLE;
+ }
+
+ if (NULL == ProgData) {
+ return OL_ERRC_INVALID_NULL_POINTER;
+ }
+
+ if (NULL == Queue) {
+ return OL_ERRC_INVALID_NULL_POINTER;
+ }
+ }
+
+ return olCreateProgram_impl(Device, ProgData, ProgDataSize, Queue);
+}
+OL_APIEXPORT ol_result_t OL_APICALL
+olCreateProgram(ol_device_handle_t Device, void *ProgData, size_t ProgDataSize,
+ ol_program_handle_t *Queue) {
+ if (offloadConfig().TracingEnabled) {
+ std::cout << "---> olCreateProgram";
+ }
+
+ ol_result_t Result =
+ olCreateProgram_val(Device, ProgData, ProgDataSize, Queue);
+
+ if (offloadConfig().TracingEnabled) {
+ ol_create_program_params_t Params = {&Device, &ProgData, &ProgDataSize,
+ &Queue};
+ std::cout << "(" << &Params << ")";
+ std::cout << "-> " << Result << "\n";
+ if (Result && Result->Details) {
+ std::cout << " *Error Details* " << Result->Details << " \n";
+ }
+ }
+ return Result;
+}
+ol_result_t olCreateProgramWithCodeLoc(ol_device_handle_t Device,
+ void *ProgData, size_t ProgDataSize,
+ ol_program_handle_t *Queue,
+ ol_code_location_t *CodeLocation) {
+ currentCodeLocation() = CodeLocation;
+ ol_result_t Result = olCreateProgram(Device, ProgData, ProgDataSize, Queue);
+
+ currentCodeLocation() = nullptr;
+ return Result;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+ol_impl_result_t olRetainProgram_val(ol_program_handle_t Program) {
+ if (true /*enableParameterValidation*/) {
+ if (NULL == Program) {
+ return OL_ERRC_INVALID_NULL_HANDLE;
+ }
+ }
+
+ return olRetainProgram_impl(Program);
+}
+OL_APIEXPORT ol_result_t OL_APICALL
+olRetainProgram(ol_program_handle_t Program) {
+ if (offloadConfig().TracingEnabled) {
+ std::cout << "---> olRetainProgram";
+ }
+
+ ol_result_t Result = olRetainProgram_val(Program);
+
+ if (offloadConfig().TracingEnabled) {
+ ol_retain_program_params_t Params = {&Program};
+ std::cout << "(" << &Params << ")";
+ std::cout << "-> " << Result << "\n";
+ if (Result && Result->Details) {
+ std::cout << " *Error Details* " << Result->Details << " \n";
+ }
+ }
+ return Result;
+}
+ol_result_t olRetainProgramWithCodeLoc(ol_program_handle_t Program,
+ ol_code_location_t *CodeLocation) {
+ currentCodeLocation() = CodeLocation;
+ ol_result_t Result = olRetainProgram(Program);
+
+ currentCodeLocation() = nullptr;
+ return Result;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+ol_impl_result_t olReleaseProgram_val(ol_program_handle_t Program) {
+ if (true /*enableParameterValidation*/) {
+ if (NULL == Program) {
+ return OL_ERRC_INVALID_NULL_HANDLE;
+ }
+ }
+
+ return olReleaseProgram_impl(Program);
+}
+OL_APIEXPORT ol_result_t OL_APICALL
+olReleaseProgram(ol_program_handle_t Program) {
+ if (offloadConfig().TracingEnabled) {
+ std::cout << "---> olReleaseProgram";
+ }
+
+ ol_result_t Result = olReleaseProgram_val(Program);
+
+ if (offloadConfig().TracingEnabled) {
+ ol_release_program_params_t Params = {&Program};
+ std::cout << "(" << &Params << ")";
+ std::cout << "-> " << Result << "\n";
+ if (Result && Result->Details) {
+ std::cout << " *Error Details* " << Result->Details << " \n";
+ }
+ }
+ return Result;
+}
+ol_result_t olReleaseProgramWithCodeLoc(ol_program_handle_t Program,
+ ol_code_location_t *CodeLocation) {
+ currentCodeLocation() = CodeLocation;
+ ol_result_t Result = olReleaseProgram(Program);
+
+ currentCodeLocation() = nullptr;
+ return Result;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+ol_impl_result_t olCreateKernel_val(ol_program_handle_t Program,
+ const char *KernelName,
+ ol_kernel_handle_t *Kernel) {
+ if (true /*enableParameterValidation*/) {
+ if (NULL == Program) {
+ return OL_ERRC_INVALID_NULL_HANDLE;
+ }
+
+ if (NULL == KernelName) {
+ return OL_ERRC_INVALID_NULL_POINTER;
+ }
+
+ if (NULL == Kernel) {
+ return OL_ERRC_INVALID_NULL_POINTER;
+ }
+ }
+
+ return olCreateKernel_impl(Program, KernelName, Kernel);
+}
+OL_APIEXPORT ol_result_t OL_APICALL olCreateKernel(ol_program_handle_t Program,
+ const char *KernelName,
+ ol_kernel_handle_t *Kernel) {
+ if (offloadConfig().TracingEnabled) {
+ std::cout << "---> olCreateKernel";
+ }
+
+ ol_result_t Result = olCreateKernel_val(Program, KernelName, Kernel);
+
+ if (offloadConfig().TracingEnabled) {
+ ol_create_kernel_params_t Params = {&Program, &KernelName, &Kernel};
+ std::cout << "(" << &Params << ")";
+ std::cout << "-> " << Result << "\n";
+ if (Result && Result->Details) {
+ std::cout << " *Error Details* " << Result->Details << " \n";
+ }
+ }
+ return Result;
+}
+ol_result_t olCreateKernelWithCodeLoc(ol_program_handle_t Program,
+ const char *KernelName,
+ ol_kernel_handle_t *Kernel,
+ ol_code_location_t *CodeLocation) {
+ currentCodeLocation() = CodeLocation;
+ ol_result_t Result = olCreateKernel(Program, KernelName, Kernel);
+
+ currentCodeLocation() = nullptr;
+ return Result;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+ol_impl_result_t olRetainKernel_val(ol_kernel_handle_t Kernel) {
+ if (true /*enableParameterValidation*/) {
+ if (NULL == Kernel) {
+ return OL_ERRC_INVALID_NULL_HANDLE;
+ }
+ }
+
+ return olRetainKernel_impl(Kernel);
+}
+OL_APIEXPORT ol_result_t OL_APICALL olRetainKernel(ol_kernel_handle_t Kernel) {
+ if (offloadConfig().TracingEnabled) {
+ std::cout << "---> olRetainKernel";
+ }
+
+ ol_result_t Result = olRetainKernel_val(Kernel);
+
+ if (offloadConfig().TracingEnabled) {
+ ol_retain_kernel_params_t Params = {&Kernel};
+ std::cout << "(" << &Params << ")";
+ std::cout << "-> " << Result << "\n";
+ if (Result && Result->Details) {
+ std::cout << " *Error Details* " << Result->Details << " \n";
+ }
+ }
+ return Result;
+}
+ol_result_t olRetainKernelWithCodeLoc(ol_kernel_handle_t Kernel,
+ ol_code_location_t *CodeLocation) {
+ currentCodeLocation() = CodeLocation;
+ ol_result_t Result = olRetainKernel(Kernel);
+
+ currentCodeLocation() = nullptr;
+ return Result;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+ol_impl_result_t olReleaseKernel_val(ol_kernel_handle_t Kernel) {
+ if (true /*enableParameterValidation*/) {
+ if (NULL == Kernel) {
+ return OL_ERRC_INVALID_NULL_HANDLE;
+ }
+ }
+
+ return olReleaseKernel_impl(Kernel);
+}
+OL_APIEXPORT ol_result_t OL_APICALL olReleaseKernel(ol_kernel_handle_t Kernel) {
+ if (offloadConfig().TracingEnabled) {
+ std::cout << "---> olReleaseKernel";
+ }
+
+ ol_result_t Result = olReleaseKernel_val(Kernel);
+
+ if (offloadConfig().TracingEnabled) {
+ ol_release_kernel_params_t Params = {&Kernel};
+ std::cout << "(" << &Params << ")";
+ std::cout << "-> " << Result << "\n";
+ if (Result && Result->Details) {
+ std::cout << " *Error Details* " << Result->Details << " \n";
+ }
+ }
+ return Result;
+}
+ol_result_t olReleaseKernelWithCodeLoc(ol_kernel_handle_t Kernel,
+ ol_code_location_t *CodeLocation) {
+ currentCodeLocation() = CodeLocation;
+ ol_result_t Result = olReleaseKernel(Kernel);
+
+ currentCodeLocation() = nullptr;
+ return Result;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+ol_impl_result_t olSetKernelArgValue_val(ol_kernel_handle_t Kernel,
+ uint32_t Index, size_t Size,
+ void *ArgData) {
+ if (true /*enableParameterValidation*/) {
+ if (NULL == Kernel) {
+ return OL_ERRC_INVALID_NULL_HANDLE;
+ }
+
+ if (NULL == ArgData) {
+ return OL_ERRC_INVALID_NULL_POINTER;
+ }
+ }
+
+ return olSetKernelArgValue_impl(Kernel, Index, Size, ArgData);
+}
+OL_APIEXPORT ol_result_t OL_APICALL olSetKernelArgValue(
+ ol_kernel_handle_t Kernel, uint32_t Index, size_t Size, void *ArgData) {
+ if (offloadConfig().TracingEnabled) {
+ std::cout << "---> olSetKernelArgValue";
+ }
+
+ ol_result_t Result = olSetKernelArgValue_val(Kernel, Index, Size, ArgData);
+
+ if (offloadConfig().TracingEnabled) {
+ ol_set_kernel_arg_value_params_t Params = {&Kernel, &Index, &Size,
+ &ArgData};
+ std::cout << "(" << &Params << ")";
+ std::cout << "-> " << Result << "\n";
+ if (Result && Result->Details) {
+ std::cout << " *Error Details* " << Result->Details << " \n";
+ }
+ }
+ return Result;
+}
+ol_result_t olSetKernelArgValueWithCodeLoc(ol_kernel_handle_t Kernel,
+ uint32_t Index, size_t Size,
+ void *ArgData,
+ ol_code_location_t *CodeLocation) {
+ currentCodeLocation() = CodeLocation;
+ ol_result_t Result = olSetKernelArgValue(Kernel, Index, Size, ArgData);
+
+ currentCodeLocation() = nullptr;
+ return Result;
+}
diff --git a/offload/liboffload/include/generated/OffloadFuncs.inc b/offload/liboffload/include/generated/OffloadFuncs.inc
index 26120f18279dc..6f2bb34599a1d 100644
--- a/offload/liboffload/include/generated/OffloadFuncs.inc
+++ b/offload/liboffload/include/generated/OffloadFuncs.inc
@@ -22,6 +22,24 @@ OFFLOAD_FUNC(olGetDeviceInfo)
OFFLOAD_FUNC(olGetDeviceInfoSize)
OFFLOAD_FUNC(olMemAlloc)
OFFLOAD_FUNC(olMemFree)
+OFFLOAD_FUNC(olCreateQueue)
+OFFLOAD_FUNC(olRetainQueue)
+OFFLOAD_FUNC(olReleaseQueue)
+OFFLOAD_FUNC(olFinishQueue)
+OFFLOAD_FUNC(olRetainEvent)
+OFFLOAD_FUNC(olReleaseEvent)
+OFFLOAD_FUNC(olWaitEvent)
+OFFLOAD_FUNC(olEnqueueDataWrite)
+OFFLOAD_FUNC(olEnqueueDataRead)
+OFFLOAD_FUNC(olEnqueueDataCopy)
+OFFLOAD_FUNC(olEnqueueKernelLaunch)
+OFFLOAD_FUNC(olCreateProgram)
+OFFLOAD_FUNC(olRetainProgram)
+OFFLOAD_FUNC(olReleaseProgram)
+OFFLOAD_FUNC(olCreateKernel)
+OFFLOAD_FUNC(olRetainKernel)
+OFFLOAD_FUNC(olReleaseKernel)
+OFFLOAD_FUNC(olSetKernelArgValue)
OFFLOAD_FUNC(olInitWithCodeLoc)
OFFLOAD_FUNC(olShutDownWithCodeLoc)
OFFLOAD_FUNC(olGetPlatformWithCodeLoc)
@@ -34,5 +52,23 @@ OFFLOAD_FUNC(olGetDeviceInfoWithCodeLoc)
OFFLOAD_FUNC(olGetDeviceInfoSizeWithCodeLoc)
OFFLOAD_FUNC(olMemAllocWithCodeLoc)
OFFLOAD_FUNC(olMemFreeWithCodeLoc)
+OFFLOAD_FUNC(olCreateQueueWithCodeLoc)
+OFFLOAD_FUNC(olRetainQueueWithCodeLoc)
+OFFLOAD_FUNC(olReleaseQueueWithCodeLoc)
+OFFLOAD_FUNC(olFinishQueueWithCodeLoc)
+OFFLOAD_FUNC(olRetainEventWithCodeLoc)
+OFFLOAD_FUNC(olReleaseEventWithCodeLoc)
+OFFLOAD_FUNC(olWaitEventWithCodeLoc)
+OFFLOAD_FUNC(olEnqueueDataWriteWithCodeLoc)
+OFFLOAD_FUNC(olEnqueueDataReadWithCodeLoc)
+OFFLOAD_FUNC(olEnqueueDataCopyWithCodeLoc)
+OFFLOAD_FUNC(olEnqueueKernelLaunchWithCodeLoc)
+OFFLOAD_FUNC(olCreateProgramWithCodeLoc)
+OFFLOAD_FUNC(olRetainProgramWithCodeLoc)
+OFFLOAD_FUNC(olReleaseProgramWithCodeLoc)
+OFFLOAD_FUNC(olCreateKernelWithCodeLoc)
+OFFLOAD_FUNC(olRetainKernelWithCodeLoc)
+OFFLOAD_FUNC(olReleaseKernelWithCodeLoc)
+OFFLOAD_FUNC(olSetKernelArgValueWithCodeLoc)
#undef OFFLOAD_FUNC
diff --git a/offload/liboffload/include/generated/OffloadImplFuncDecls.inc b/offload/liboffload/include/generated/OffloadImplFuncDecls.inc
index f0a96081fd243..9d21d8fc97090 100644
--- a/offload/liboffload/include/generated/OffloadImplFuncDecls.inc
+++ b/offload/liboffload/include/generated/OffloadImplFuncDecls.inc
@@ -43,3 +43,57 @@ ol_impl_result_t olMemAlloc_impl(ol_device_handle_t Device,
ol_impl_result_t olMemFree_impl(ol_device_handle_t Device, ol_alloc_type_t Type,
void *Address);
+
+ol_impl_result_t olCreateQueue_impl(ol_device_handle_t Device,
+ ol_queue_handle_t *Queue);
+
+ol_impl_result_t olRetainQueue_impl(ol_queue_handle_t Queue);
+
+ol_impl_result_t olReleaseQueue_impl(ol_queue_handle_t Queue);
+
+ol_impl_result_t olFinishQueue_impl(ol_queue_handle_t Queue);
+
+ol_impl_result_t olRetainEvent_impl(ol_event_handle_t Event);
+
+ol_impl_result_t olReleaseEvent_impl(ol_event_handle_t Event);
+
+ol_impl_result_t olWaitEvent_impl(ol_event_handle_t Event);
+
+ol_impl_result_t olEnqueueDataWrite_impl(ol_queue_handle_t Queue, void *SrcPtr,
+ void *DstPtr, size_t Size,
+ ol_event_handle_t *EventOut);
+
+ol_impl_result_t olEnqueueDataRead_impl(ol_queue_handle_t Queue, void *SrcPtr,
+ void *DstPtr, size_t Size,
+ ol_event_handle_t *EventOut);
+
+ol_impl_result_t olEnqueueDataCopy_impl(ol_queue_handle_t Queue, void *SrcPtr,
+ void *DstPtr,
+ ol_device_handle_t DstDevice,
+ size_t Size,
+ ol_event_handle_t *EventOut);
+
+ol_impl_result_t olEnqueueKernelLaunch_impl(ol_queue_handle_t Queue,
+ ol_kernel_handle_t Kernel,
+ const size_t *GlobalWorkSize,
+ ol_event_handle_t *EventOut);
+
+ol_impl_result_t olCreateProgram_impl(ol_device_handle_t Device, void *ProgData,
+ size_t ProgDataSize,
+ ol_program_handle_t *Queue);
+
+ol_impl_result_t olRetainProgram_impl(ol_program_handle_t Program);
+
+ol_impl_result_t olReleaseProgram_impl(ol_program_handle_t Program);
+
+ol_impl_result_t olCreateKernel_impl(ol_program_handle_t Program,
+ const char *KernelName,
+ ol_kernel_handle_t *Kernel);
+
+ol_impl_result_t olRetainKernel_impl(ol_kernel_handle_t Kernel);
+
+ol_impl_result_t olReleaseKernel_impl(ol_kernel_handle_t Kernel);
+
+ol_impl_result_t olSetKernelArgValue_impl(ol_kernel_handle_t Kernel,
+ uint32_t Index, size_t Size,
+ void *ArgData);
diff --git a/offload/liboffload/include/generated/OffloadPrint.hpp b/offload/liboffload/include/generated/OffloadPrint.hpp
index cff754237568e..698b422fc38d0 100644
--- a/offload/liboffload/include/generated/OffloadPrint.hpp
+++ b/offload/liboffload/include/generated/OffloadPrint.hpp
@@ -22,6 +22,10 @@ template <typename T> struct is_handle : std::false_type {};
template <> struct is_handle<ol_platform_handle_t> : std::true_type {};
template <> struct is_handle<ol_device_handle_t> : std::true_type {};
template <> struct is_handle<ol_context_handle_t> : std::true_type {};
+template <> struct is_handle<ol_queue_handle_t> : std::true_type {};
+template <> struct is_handle<ol_event_handle_t> : std::true_type {};
+template <> struct is_handle<ol_program_handle_t> : std::true_type {};
+template <> struct is_handle<ol_kernel_handle_t> : std::true_type {};
template <typename T> inline constexpr bool is_handle_v = is_handle<T>::value;
inline std::ostream &operator<<(std::ostream &os, enum ol_errc_t value);
@@ -455,6 +459,212 @@ inline std::ostream &operator<<(std::ostream &os,
return os;
}
+inline std::ostream &operator<<(std::ostream &os,
+ const struct ol_create_queue_params_t *params) {
+ os << ".Device = ";
+ printPtr(os, *params->pDevice);
+ os << ", ";
+ os << ".Queue = ";
+ printPtr(os, *params->pQueue);
+ return os;
+}
+
+inline std::ostream &operator<<(std::ostream &os,
+ const struct ol_retain_queue_params_t *params) {
+ os << ".Queue = ";
+ printPtr(os, *params->pQueue);
+ return os;
+}
+
+inline std::ostream &
+operator<<(std::ostream &os, const struct ol_release_queue_params_t *params) {
+ os << ".Queue = ";
+ printPtr(os, *params->pQueue);
+ return os;
+}
+
+inline std::ostream &operator<<(std::ostream &os,
+ const struct ol_finish_queue_params_t *params) {
+ os << ".Queue = ";
+ printPtr(os, *params->pQueue);
+ return os;
+}
+
+inline std::ostream &operator<<(std::ostream &os,
+ const struct ol_retain_event_params_t *params) {
+ os << ".Event = ";
+ printPtr(os, *params->pEvent);
+ return os;
+}
+
+inline std::ostream &
+operator<<(std::ostream &os, const struct ol_release_event_params_t *params) {
+ os << ".Event = ";
+ printPtr(os, *params->pEvent);
+ return os;
+}
+
+inline std::ostream &operator<<(std::ostream &os,
+ const struct ol_wait_event_params_t *params) {
+ os << ".Event = ";
+ printPtr(os, *params->pEvent);
+ return os;
+}
+
+inline std::ostream &
+operator<<(std::ostream &os,
+ const struct ol_enqueue_data_write_params_t *params) {
+ os << ".Queue = ";
+ printPtr(os, *params->pQueue);
+ os << ", ";
+ os << ".SrcPtr = ";
+ printPtr(os, *params->pSrcPtr);
+ os << ", ";
+ os << ".DstPtr = ";
+ printPtr(os, *params->pDstPtr);
+ os << ", ";
+ os << ".Size = ";
+ os << *params->pSize;
+ os << ", ";
+ os << ".EventOut = ";
+ printPtr(os, *params->pEventOut);
+ return os;
+}
+
+inline std::ostream &
+operator<<(std::ostream &os,
+ const struct ol_enqueue_data_read_params_t *params) {
+ os << ".Queue = ";
+ printPtr(os, *params->pQueue);
+ os << ", ";
+ os << ".SrcPtr = ";
+ printPtr(os, *params->pSrcPtr);
+ os << ", ";
+ os << ".DstPtr = ";
+ printPtr(os, *params->pDstPtr);
+ os << ", ";
+ os << ".Size = ";
+ os << *params->pSize;
+ os << ", ";
+ os << ".EventOut = ";
+ printPtr(os, *params->pEventOut);
+ return os;
+}
+
+inline std::ostream &
+operator<<(std::ostream &os,
+ const struct ol_enqueue_data_copy_params_t *params) {
+ os << ".Queue = ";
+ printPtr(os, *params->pQueue);
+ os << ", ";
+ os << ".SrcPtr = ";
+ printPtr(os, *params->pSrcPtr);
+ os << ", ";
+ os << ".DstPtr = ";
+ printPtr(os, *params->pDstPtr);
+ os << ", ";
+ os << ".DstDevice = ";
+ printPtr(os, *params->pDstDevice);
+ os << ", ";
+ os << ".Size = ";
+ os << *params->pSize;
+ os << ", ";
+ os << ".EventOut = ";
+ printPtr(os, *params->pEventOut);
+ return os;
+}
+
+inline std::ostream &
+operator<<(std::ostream &os,
+ const struct ol_enqueue_kernel_launch_params_t *params) {
+ os << ".Queue = ";
+ printPtr(os, *params->pQueue);
+ os << ", ";
+ os << ".Kernel = ";
+ printPtr(os, *params->pKernel);
+ os << ", ";
+ os << ".GlobalWorkSize = ";
+ printPtr(os, *params->pGlobalWorkSize);
+ os << ", ";
+ os << ".EventOut = ";
+ printPtr(os, *params->pEventOut);
+ return os;
+}
+
+inline std::ostream &
+operator<<(std::ostream &os, const struct ol_create_program_params_t *params) {
+ os << ".Device = ";
+ printPtr(os, *params->pDevice);
+ os << ", ";
+ os << ".ProgData = ";
+ printPtr(os, *params->pProgData);
+ os << ", ";
+ os << ".ProgDataSize = ";
+ os << *params->pProgDataSize;
+ os << ", ";
+ os << ".Queue = ";
+ printPtr(os, *params->pQueue);
+ return os;
+}
+
+inline std::ostream &
+operator<<(std::ostream &os, const struct ol_retain_program_params_t *params) {
+ os << ".Program = ";
+ printPtr(os, *params->pProgram);
+ return os;
+}
+
+inline std::ostream &
+operator<<(std::ostream &os, const struct ol_release_program_params_t *params) {
+ os << ".Program = ";
+ printPtr(os, *params->pProgram);
+ return os;
+}
+
+inline std::ostream &
+operator<<(std::ostream &os, const struct ol_create_kernel_params_t *params) {
+ os << ".Program = ";
+ printPtr(os, *params->pProgram);
+ os << ", ";
+ os << ".KernelName = ";
+ printPtr(os, *params->pKernelName);
+ os << ", ";
+ os << ".Kernel = ";
+ printPtr(os, *params->pKernel);
+ return os;
+}
+
+inline std::ostream &
+operator<<(std::ostream &os, const struct ol_retain_kernel_params_t *params) {
+ os << ".Kernel = ";
+ printPtr(os, *params->pKernel);
+ return os;
+}
+
+inline std::ostream &
+operator<<(std::ostream &os, const struct ol_release_kernel_params_t *params) {
+ os << ".Kernel = ";
+ printPtr(os, *params->pKernel);
+ return os;
+}
+
+inline std::ostream &
+operator<<(std::ostream &os,
+ const struct ol_set_kernel_arg_value_params_t *params) {
+ os << ".Kernel = ";
+ printPtr(os, *params->pKernel);
+ os << ", ";
+ os << ".Index = ";
+ os << *params->pIndex;
+ os << ", ";
+ os << ".Size = ";
+ os << *params->pSize;
+ os << ", ";
+ os << ".ArgData = ";
+ printPtr(os, *params->pArgData);
+ return os;
+}
+
///////////////////////////////////////////////////////////////////////////////
// @brief Print pointer value
template <typename T>
diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp
index 3e609ed03917f..d1c72ecced875 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -34,6 +34,117 @@ struct ol_platform_handle_t_ {
std::vector<ol_device_handle_t_> Devices;
};
+struct ol_queue_handle_t_ {
+ __tgt_async_info *AsyncInfo;
+ ol_device_handle_t Device;
+ std::atomic_uint32_t RefCount;
+};
+
+struct ol_event_handle_t_ {
+ void *EventInfo;
+ ol_queue_handle_t Queue;
+ ol_device_handle_t Device;
+ std::atomic_uint32_t RefCount;
+};
+
+struct ol_program_handle_t_ {
+ llvm::omp::target::plugin::DeviceImageTy *Image;
+ std::atomic_uint32_t RefCount;
+};
+
+struct OffloadArguments {
+ static constexpr size_t MaxParamBytes = 4000u;
+ using args_t = std::array<char, MaxParamBytes>;
+ using args_size_t = std::vector<size_t>;
+ using args_index_t = std::vector<void *>;
+ args_t Storage;
+ args_size_t ParamSizes;
+ args_index_t Indices;
+ args_size_t OffsetPerIndex;
+
+ std::uint32_t ImplicitOffsetArgs[3] = {0, 0, 0};
+
+ // NOTE:
+ // This implementation is an exact copy of the CUDA adapter's argument
+ // implementation. Even though it was designed for CUDA, the design of
+ // libomptarget means it should work for other plugins as they will expect
+ // the same argument layout.
+ OffloadArguments() {
+ // Place the implicit offset index at the end of the indicies collection
+ Indices.emplace_back(&ImplicitOffsetArgs);
+ }
+
+ /// Add an argument to the kernel.
+ /// If the argument existed before, it is replaced.
+ /// Otherwise, it is added.
+ /// Gaps are filled with empty arguments.
+ /// Implicit offset argument is kept at the back of the indices collection.
+ void addArg(size_t Index, size_t Size, const void *Arg,
+ size_t LocalSize = 0) {
+ if (Index + 2 > Indices.size()) {
+ // Move implicit offset argument index with the end
+ Indices.resize(Index + 2, Indices.back());
+ // Ensure enough space for the new argument
+ ParamSizes.resize(Index + 1);
+ OffsetPerIndex.resize(Index + 1);
+ }
+ ParamSizes[Index] = Size;
+ // calculate the insertion point on the array
+ size_t InsertPos = std::accumulate(std::begin(ParamSizes),
+ std::begin(ParamSizes) + Index, 0);
+ // Update the stored value for the argument
+ std::memcpy(&Storage[InsertPos], Arg, Size);
+ Indices[Index] = &Storage[InsertPos];
+ OffsetPerIndex[Index] = LocalSize;
+ }
+
+ void addLocalArg(size_t Index, size_t Size) {
+ size_t LocalOffset = this->getLocalSize();
+
+ // maximum required alignment is the size of the largest vector type
+ const size_t MaxAlignment = sizeof(double) * 16;
+
+ // for arguments smaller than the maximum alignment simply align to the
+ // size of the argument
+ const size_t Alignment = std::min(MaxAlignment, Size);
+
+ // align the argument
+ size_t AlignedLocalOffset = LocalOffset;
+ size_t Pad = LocalOffset % Alignment;
+ if (Pad != 0) {
+ AlignedLocalOffset += Alignment - Pad;
+ }
+
+ addArg(Index, sizeof(size_t), (const void *)&(AlignedLocalOffset),
+ Size + (AlignedLocalOffset - LocalOffset));
+ }
+
+ void setImplicitOffset(size_t Size, std::uint32_t *ImplicitOffset) {
+ assert(Size == sizeof(std::uint32_t) * 3);
+ std::memcpy(ImplicitOffsetArgs, ImplicitOffset, Size);
+ }
+
+ void clearLocalSize() {
+ std::fill(std::begin(OffsetPerIndex), std::end(OffsetPerIndex), 0);
+ }
+
+ const args_index_t &getIndices() const noexcept { return Indices; }
+
+ uint32_t getLocalSize() const {
+ return std::accumulate(std::begin(OffsetPerIndex), std::end(OffsetPerIndex),
+ 0);
+ }
+
+ const char *getStorage() const noexcept { return Storage.data(); }
+};
+
+struct ol_kernel_handle_t_ {
+ ol_program_handle_t Program;
+ std::atomic_uint32_t RefCount;
+ GenericKernelTy *KernelImpl;
+ OffloadArguments Args;
+};
+
using PlatformVecT = SmallVector<ol_platform_handle_t_, 4>;
PlatformVecT &Platforms() {
static PlatformVecT Platforms;
@@ -280,3 +391,252 @@ ol_impl_result_t olMemFree_impl(ol_device_handle_t Device, ol_alloc_type_t Type,
}
return OL_SUCCESS;
}
+
+ol_impl_result_t olCreateQueue_impl(ol_device_handle_t Device,
+ ol_queue_handle_t *Queue) {
+ auto CreatedQueue = std::make_unique<ol_queue_handle_t_>();
+ auto Err = Device->Device.initAsyncInfo(&(CreatedQueue->AsyncInfo));
+ if (Err) {
+ return OL_ERRC_OUT_OF_RESOURCES;
+ }
+ // TODO: Check error
+ CreatedQueue->Device = Device;
+ CreatedQueue->RefCount = 1;
+ *Queue = CreatedQueue.release();
+ return OL_SUCCESS;
+}
+
+ol_impl_result_t olRetainQueue_impl(ol_queue_handle_t Queue) {
+ Queue->RefCount++;
+ return OL_SUCCESS;
+}
+
+ol_impl_result_t olReleaseQueue_impl(ol_queue_handle_t Queue) {
+ Queue->RefCount--;
+ return OL_SUCCESS;
+}
+
+ol_impl_result_t olFinishQueue_impl(ol_queue_handle_t Queue) {
+ // Host plugin doesn't have a queue set so it's not safe to call synchronize
+ // on it, but we have nothing to synchronize in that situation anyway.
+ if (Queue->AsyncInfo->Queue) {
+ auto Err = Queue->Device->Device.synchronize(Queue->AsyncInfo);
+ if (Err) {
+ return OL_ERRC_OUT_OF_RESOURCES;
+ }
+ }
+
+ // Recreate the stream resource so the queue can be reused
+ // TODO: Would be easier for the synchronization to (optionally) not release
+ // it to begin with.
+ auto Res = Queue->Device->Device.initAsyncInfo(&Queue->AsyncInfo);
+ if (Res) {
+ return OL_ERRC_OUT_OF_RESOURCES;
+ }
+
+ return OL_SUCCESS;
+}
+
+ol_impl_result_t olWaitEvent_impl(ol_event_handle_t Event) {
+ auto Res = Event->Device->Device.syncEvent(Event->EventInfo);
+ if (Res) {
+ return OL_ERRC_OUT_OF_RESOURCES;
+ }
+ return OL_SUCCESS;
+}
+
+ol_impl_result_t olRetainEvent_impl(ol_event_handle_t Event) {
+ Event->RefCount++;
+ return OL_SUCCESS;
+}
+
+ol_impl_result_t olReleaseEvent_impl(ol_event_handle_t Event) {
+ Event->RefCount--;
+ return OL_SUCCESS;
+}
+
+ol_event_handle_t makeEvent(ol_queue_handle_t Queue) {
+ auto EventImpl = std::make_unique<ol_event_handle_t_>();
+ EventImpl->Queue = Queue;
+ auto Res = Queue->Device->Device.createEvent(&EventImpl->EventInfo);
+ if (Res) {
+ return nullptr;
+ }
+ Res =
+ Queue->Device->Device.recordEvent(EventImpl->EventInfo, Queue->AsyncInfo);
+ if (Res) {
+ return nullptr;
+ }
+
+ return EventImpl.release();
+}
+
+ol_impl_result_t olEnqueueDataWrite_impl(ol_queue_handle_t Queue, void *SrcPtr,
+ void *DstPtr, size_t Size,
+ ol_event_handle_t *EventOut) {
+ auto &DeviceImpl = Queue->Device->Device;
+
+ auto Res = DeviceImpl.dataSubmit(DstPtr, SrcPtr, Size, Queue->AsyncInfo);
+
+ if (Res) {
+ return OL_ERRC_OUT_OF_RESOURCES;
+ }
+
+ if (EventOut) {
+ *EventOut = makeEvent(Queue);
+ }
+
+ return OL_SUCCESS;
+}
+
+ol_impl_result_t olEnqueueDataRead_impl(ol_queue_handle_t Queue, void *SrcPtr,
+ void *DstPtr, size_t Size,
+ ol_event_handle_t *EventOut) {
+ auto &DeviceImpl = Queue->Device->Device;
+
+ auto Res = DeviceImpl.dataRetrieve(DstPtr, SrcPtr, Size, Queue->AsyncInfo);
+
+ if (Res) {
+ return OL_ERRC_OUT_OF_RESOURCES;
+ }
+
+ if (EventOut) {
+ *EventOut = makeEvent(Queue);
+ }
+
+ return OL_SUCCESS;
+}
+
+ol_impl_result_t olEnqueueDataCopy_impl(ol_queue_handle_t Queue, void *SrcPtr,
+ void *DstPtr,
+ ol_device_handle_t DstDevice,
+ size_t Size,
+ ol_event_handle_t *EventOut) {
+ auto &DeviceImpl = Queue->Device->Device;
+
+ auto Res = DeviceImpl.dataExchange(SrcPtr, DstDevice->Device, DstPtr, Size,
+ Queue->AsyncInfo);
+
+ if (Res) {
+ return OL_ERRC_OUT_OF_RESOURCES;
+ }
+
+ if (EventOut) {
+ *EventOut = makeEvent(Queue);
+ }
+
+ return OL_SUCCESS;
+}
+
+ol_impl_result_t olCreateProgram_impl(ol_device_handle_t Device, void *ProgData,
+ size_t ProgDataSize,
+ ol_program_handle_t *Program) {
+
+ __tgt_device_image DeviceImage{
+ ProgData, ((char *)ProgData) + ProgDataSize - 1, nullptr, nullptr};
+
+ auto Res = Device->Device.loadBinary(Device->Device.Plugin, &DeviceImage);
+ if (!Res)
+ return OL_ERRC_INVALID_VALUE;
+
+ ol_program_handle_t Prog = new ol_program_handle_t_();
+ Prog->Image = *Res;
+ Prog->RefCount = 1;
+ *Program = Prog;
+
+ return OL_SUCCESS;
+}
+
+ol_impl_result_t olRetainProgram_impl(ol_program_handle_t Program) {
+ ++Program->RefCount;
+ return OL_SUCCESS;
+}
+
+ol_impl_result_t olReleaseProgram_impl(ol_program_handle_t Program) {
+ if (--Program->RefCount == 0) {
+ delete Program;
+ }
+ return OL_SUCCESS;
+}
+
+ol_impl_result_t olCreateKernel_impl(ol_program_handle_t Program,
+ const char *KernelName,
+ ol_kernel_handle_t *Kernel) {
+
+ auto &Device = Program->Image->getDevice();
+ auto KernelImpl = Device.constructKernel(KernelName);
+ if (!KernelImpl) {
+ return OL_ERRC_OUT_OF_RESOURCES;
+ }
+
+ auto Err = KernelImpl->init(Device, *Program->Image);
+ if (Err) {
+ return OL_ERRC_OUT_OF_RESOURCES;
+ }
+
+ ol_kernel_handle_t CreatedKernel = new ol_kernel_handle_t_();
+ CreatedKernel->Program = Program;
+ CreatedKernel->RefCount = 1;
+ CreatedKernel->KernelImpl = &*KernelImpl;
+ *Kernel = CreatedKernel;
+
+ return OL_SUCCESS;
+}
+
+ol_impl_result_t olRetainKernel_impl(ol_kernel_handle_t Kernel) {
+ Kernel->RefCount++;
+ return OL_SUCCESS;
+}
+
+ol_impl_result_t olReleaseKernel_impl(ol_kernel_handle_t Kernel) {
+ if (--Kernel->RefCount == 0) {
+ delete Kernel;
+ }
+ return OL_SUCCESS;
+}
+
+ol_impl_result_t olSetKernelArgValue_impl(ol_kernel_handle_t Kernel,
+ uint32_t Index, size_t Size,
+ void *ArgData) {
+ Kernel->Args.addArg(Index, Size, ArgData);
+
+ return OL_SUCCESS;
+}
+
+ol_impl_result_t olEnqueueKernelLaunch_impl(ol_queue_handle_t Queue,
+ ol_kernel_handle_t Kernel,
+ const size_t *GlobalWorkSize,
+ ol_event_handle_t *EventOut) {
+ auto &DeviceImpl = Queue->Device->Device;
+
+ AsyncInfoWrapperTy AsyncInfoWrapper(DeviceImpl, Queue->AsyncInfo);
+
+ KernelArgsTy LaunchArgs{};
+ LaunchArgs.NumArgs = Kernel->Args.getIndices().size() - 1; // TODO
+ LaunchArgs.NumTeams[0] = GlobalWorkSize[0];
+ LaunchArgs.NumTeams[1] = 1;
+ LaunchArgs.NumTeams[2] = 1;
+ LaunchArgs.ThreadLimit[0] = 1;
+ LaunchArgs.ThreadLimit[1] = 1;
+ LaunchArgs.ThreadLimit[2] = 1;
+
+ LaunchArgs.ArgPtrs = (void **)Kernel->Args.getStorage();
+
+ // TODO: Verify this
+ auto ArgOffsets = std::vector<ptrdiff_t>(LaunchArgs.NumArgs, 0ul);
+
+ auto Err = Kernel->KernelImpl->launch(
+ DeviceImpl, (void **)Kernel->Args.getStorage(), ArgOffsets.data(),
+ LaunchArgs, AsyncInfoWrapper);
+
+ AsyncInfoWrapper.finalize(Err);
+ if (Err) {
+ return OL_ERRC_OUT_OF_RESOURCES;
+ }
+
+ if (EventOut) {
+ *EventOut = makeEvent(Queue);
+ }
+
+ return OL_SUCCESS;
+}
diff --git a/offload/plugins-nextgen/common/include/GlobalHandler.h b/offload/plugins-nextgen/common/include/GlobalHandler.h
index d2914e7cd0eb4..d65fceb8508d2 100644
--- a/offload/plugins-nextgen/common/include/GlobalHandler.h
+++ b/offload/plugins-nextgen/common/include/GlobalHandler.h
@@ -131,8 +131,9 @@ class GenericGlobalHandlerTy {
/// Get the address and size of a global in the image. Address and size are
/// return in \p ImageGlobal, the global name is passed in \p ImageGlobal.
- Error getGlobalMetadataFromImage(GenericDeviceTy &Device,
- DeviceImageTy &Image, GlobalTy &ImageGlobal);
+ virtual Error getGlobalMetadataFromImage(GenericDeviceTy &Device,
+ DeviceImageTy &Image,
+ GlobalTy &ImageGlobal);
/// Read the memory associated with a global from the image and store it on
/// the host. The name, size, and destination are defined by \p HostGlobal.
diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp
index 894d1c2214b97..d7a69091ada74 100644
--- a/offload/plugins-nextgen/cuda/src/rtl.cpp
+++ b/offload/plugins-nextgen/cuda/src/rtl.cpp
@@ -1327,6 +1327,34 @@ class CUDAGlobalHandlerTy final : public GenericGlobalHandlerTy {
DeviceGlobal.setPtr(reinterpret_cast<void *>(CUPtr));
return Plugin::success();
}
+
+ Error getGlobalMetadataFromImage(GenericDeviceTy &Device,
+ DeviceImageTy &Image,
+ GlobalTy &ImageGlobal) override {
+ // If the image is an ELF we can use the generic path, otherwise fall back
+ // and use cuModuleGetGlobal to query the image.
+ if (utils::elf::isELF(Image.getMemoryBuffer().getBuffer())) {
+ return GenericGlobalHandlerTy::getGlobalMetadataFromImage(Device, Image,
+ ImageGlobal);
+ }
+
+ CUDADeviceImageTy &CUDAImage = static_cast<CUDADeviceImageTy &>(Image);
+
+ const char *GlobalName = ImageGlobal.getName().data();
+
+ size_t CUSize;
+ CUdeviceptr CUPtr;
+ CUresult Res =
+ cuModuleGetGlobal(&CUPtr, &CUSize, CUDAImage.getModule(), GlobalName);
+ if (auto Err = Plugin::check(Res, "Error in cuModuleGetGlobal for '%s': %s",
+ GlobalName))
+ return Err;
+
+ // Setup the global symbol's address and size.
+ ImageGlobal.setPtr(reinterpret_cast<void *>(CUPtr));
+ ImageGlobal.setSize(CUSize);
+ return Plugin::success();
+ }
};
/// Class implementing the CUDA-specific functionalities of the plugin.
diff --git a/offload/plugins-nextgen/host/src/rtl.cpp b/offload/plugins-nextgen/host/src/rtl.cpp
index 915c41e88c582..1ba9a49f4f9af 100644
--- a/offload/plugins-nextgen/host/src/rtl.cpp
+++ b/offload/plugins-nextgen/host/src/rtl.cpp
@@ -287,9 +287,9 @@ struct GenELF64DeviceTy : public GenericDeviceTy {
return Plugin::success();
}
- /// This plugin does not support interoperability
+ /// This plugin does not support interoperability, do nothing
Error initAsyncInfoImpl(AsyncInfoWrapperTy &AsyncInfoWrapper) override {
- return Plugin::error("initAsyncInfoImpl not supported");
+ return Plugin::success();
}
/// This plugin does not support interoperability
diff --git a/offload/unittests/OffloadAPI/CMakeLists.txt b/offload/unittests/OffloadAPI/CMakeLists.txt
index 033ee2b6ec746..e0d790684898d 100644
--- a/offload/unittests/OffloadAPI/CMakeLists.txt
+++ b/offload/unittests/OffloadAPI/CMakeLists.txt
@@ -10,7 +10,9 @@ add_libompt_unittest("offload.unittests"
${CMAKE_CURRENT_SOURCE_DIR}/device/olGetDevice.cpp
${CMAKE_CURRENT_SOURCE_DIR}/device/olGetDeviceCount.cpp
${CMAKE_CURRENT_SOURCE_DIR}/device/olGetDeviceInfo.cpp
- ${CMAKE_CURRENT_SOURCE_DIR}/device/olGetDeviceInfoSize.cpp)
+ ${CMAKE_CURRENT_SOURCE_DIR}/device/olGetDeviceInfoSize.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/queue/olCreateQueue.cpp
+ )
add_dependencies("offload.unittests" ${PLUGINS_TEST_COMMON})
target_link_libraries("offload.unittests" PRIVATE ${PLUGINS_TEST_COMMON})
target_include_directories("offload.unittests" PRIVATE ${PLUGINS_TEST_INCLUDE})
diff --git a/offload/unittests/OffloadAPI/queue/olCreateQueue.cpp b/offload/unittests/OffloadAPI/queue/olCreateQueue.cpp
new file mode 100644
index 0000000000000..f542dac4bb2d8
--- /dev/null
+++ b/offload/unittests/OffloadAPI/queue/olCreateQueue.cpp
@@ -0,0 +1,19 @@
+//===------- Offload API tests - olCreateQueue ----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "../common/Fixtures.hpp"
+#include <OffloadAPI.h>
+#include <gtest/gtest.h>
+
+using olCreateQueueTest = offloadDeviceTest;
+
+TEST_F(olCreateQueueTest, Success) {
+ ol_queue_handle_t Queue = nullptr;
+ ASSERT_SUCCESS(olCreateQueue(Device, &Queue));
+ ASSERT_NE(Queue, nullptr);
+}
>From f6430fef9a169a19752b7af11e2adccc701d8325 Mon Sep 17 00:00:00 2001
From: Callum Fare <callum at codeplay.com>
Date: Thu, 30 Jan 2025 15:46:15 +0000
Subject: [PATCH 04/16] Make a copy of the program binary in olCreateProgram
---
offload/liboffload/src/OffloadImpl.cpp | 12 +++++++++---
1 file changed, 9 insertions(+), 3 deletions(-)
diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp
index d1c72ecced875..1fbb424021eef 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -49,6 +49,7 @@ struct ol_event_handle_t_ {
struct ol_program_handle_t_ {
llvm::omp::target::plugin::DeviceImageTy *Image;
+ std::unique_ptr<MemoryBuffer> ImageData;
std::atomic_uint32_t RefCount;
};
@@ -531,17 +532,22 @@ ol_impl_result_t olEnqueueDataCopy_impl(ol_queue_handle_t Queue, void *SrcPtr,
ol_impl_result_t olCreateProgram_impl(ol_device_handle_t Device, void *ProgData,
size_t ProgDataSize,
ol_program_handle_t *Program) {
+ auto ImageData = MemoryBuffer::getMemBufferCopy(
+ StringRef(reinterpret_cast<char *>(ProgData), ProgDataSize));
+ __tgt_device_image DeviceImage{(char *) ImageData->getBuffer().data(),
+ ((char *)ImageData->getBuffer().data()) +
+ ProgDataSize - 1,
+ nullptr, nullptr};
- __tgt_device_image DeviceImage{
- ProgData, ((char *)ProgData) + ProgDataSize - 1, nullptr, nullptr};
+ ol_program_handle_t Prog = new ol_program_handle_t_();
auto Res = Device->Device.loadBinary(Device->Device.Plugin, &DeviceImage);
if (!Res)
return OL_ERRC_INVALID_VALUE;
- ol_program_handle_t Prog = new ol_program_handle_t_();
Prog->Image = *Res;
Prog->RefCount = 1;
+ Prog->ImageData = std::move(ImageData);
*Program = Prog;
return OL_SUCCESS;
>From fb8a1cca359a8a5c35eda4c806e90c0799a067a1 Mon Sep 17 00:00:00 2001
From: Callum Fare <callum at codeplay.com>
Date: Mon, 3 Feb 2025 10:28:41 +0000
Subject: [PATCH 05/16] Rework kernel arguments
---
offload/liboffload/API/Kernel.td | 26 +++++-
.../liboffload/include/generated/OffloadAPI.h | 50 +++++++++-
.../include/generated/OffloadEntryPoints.inc | 44 +++++++++
.../include/generated/OffloadFuncs.inc | 2 +
.../generated/OffloadImplFuncDecls.inc | 3 +
.../include/generated/OffloadPrint.hpp | 14 +++
offload/liboffload/src/OffloadImpl.cpp | 91 +++++--------------
7 files changed, 156 insertions(+), 74 deletions(-)
diff --git a/offload/liboffload/API/Kernel.td b/offload/liboffload/API/Kernel.td
index 936372c18ca37..4c8c84e9c71de 100644
--- a/offload/liboffload/API/Kernel.td
+++ b/offload/liboffload/API/Kernel.td
@@ -12,7 +12,7 @@ def : Function {
def : Function {
let name = "olRetainKernel";
- let desc = "Create a queue for the given device";
+ let desc = "Increment the reference count of the given kernel";
let details = [];
let params = [
Param<"ol_kernel_handle_t", "Kernel", "handle of the kernel", PARAM_IN>
@@ -22,7 +22,7 @@ def : Function {
def : Function {
let name = "olReleaseKernel";
- let desc = "Create a queue for the given device";
+ let desc = "Decrement the reference count of the given kernel";
let details = [];
let params = [
Param<"ol_kernel_handle_t", "Kernel", "handle of the kernel", PARAM_IN>
@@ -32,8 +32,11 @@ def : Function {
def : Function {
let name = "olSetKernelArgValue";
- let desc = "Create a queue for the given device";
- let details = [];
+ let desc = "Set the value of a single kernel argument at the given index";
+ let details = [
+ "The implementation will construct and lay out the backing storage for the kernel arguments."
+ "The effects of calls to this function on a kernel are lost if olSetKernelArgsData is called."
+ ];
let params = [
Param<"ol_kernel_handle_t", "Kernel", "handle of the kernel", PARAM_IN>,
Param<"uint32_t", "Index", "index of the argument", PARAM_IN>,
@@ -42,3 +45,18 @@ def : Function {
];
let returns = [];
}
+
+def : Function {
+ let name = "olSetKernelArgsData";
+ let desc = "Set the entire argument data for a kernel";
+ let details = [
+ "Previous calls to olSetKernelArgValue on the same kernel are invalidated by this function"
+ "The data pointed to by ArgsData is assumed to be laid out correctly according to the requirements of the backend API"
+ ];
+ let params = [
+ Param<"ol_kernel_handle_t", "Kernel", "handle of the kernel", PARAM_IN>,
+ Param<"void*", "ArgsData", "pointer to the argument data", PARAM_IN>,
+ Param<"size_t", "ArgsDataSize", "size of the argument data", PARAM_IN>
+ ];
+ let returns = [];
+}
diff --git a/offload/liboffload/include/generated/OffloadAPI.h b/offload/liboffload/include/generated/OffloadAPI.h
index 2384de19ae72e..155e31338c88b 100644
--- a/offload/liboffload/include/generated/OffloadAPI.h
+++ b/offload/liboffload/include/generated/OffloadAPI.h
@@ -838,7 +838,7 @@ OL_APIEXPORT ol_result_t OL_APICALL olCreateKernel(
ol_kernel_handle_t *Kernel);
///////////////////////////////////////////////////////////////////////////////
-/// @brief Create a queue for the given device
+/// @brief Increment the reference count of the given kernel
///
/// @details
///
@@ -854,7 +854,7 @@ OL_APIEXPORT ol_result_t OL_APICALL olRetainKernel(
ol_kernel_handle_t Kernel);
///////////////////////////////////////////////////////////////////////////////
-/// @brief Create a queue for the given device
+/// @brief Decrement the reference count of the given kernel
///
/// @details
///
@@ -870,9 +870,12 @@ OL_APIEXPORT ol_result_t OL_APICALL olReleaseKernel(
ol_kernel_handle_t Kernel);
///////////////////////////////////////////////////////////////////////////////
-/// @brief Create a queue for the given device
+/// @brief Set the value of a single kernel argument at the given index
///
/// @details
+/// - The implementation will construct and lay out the backing storage for
+/// the kernel arguments.The effects of calls to this function on a kernel
+/// are lost if olSetKernelArgsData is called.
///
/// @returns
/// - ::OL_RESULT_SUCCESS
@@ -892,6 +895,30 @@ OL_APIEXPORT ol_result_t OL_APICALL olSetKernelArgValue(
// [in] pointer to the argument data
void *ArgData);
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Set the entire argument data for a kernel
+///
+/// @details
+/// - Previous calls to olSetKernelArgValue on the same kernel are
+/// invalidated by this functionThe data pointed to by ArgsData is assumed to
+/// be laid out correctly according to the requirements of the backend API
+///
+/// @returns
+/// - ::OL_RESULT_SUCCESS
+/// - ::OL_ERRC_UNINITIALIZED
+/// - ::OL_ERRC_DEVICE_LOST
+/// - ::OL_ERRC_INVALID_NULL_HANDLE
+/// + `NULL == Kernel`
+/// - ::OL_ERRC_INVALID_NULL_POINTER
+/// + `NULL == ArgsData`
+OL_APIEXPORT ol_result_t OL_APICALL olSetKernelArgsData(
+ // [in] handle of the kernel
+ ol_kernel_handle_t Kernel,
+ // [in] pointer to the argument data
+ void *ArgsData,
+ // [in] size of the argument data
+ size_t ArgsDataSize);
+
///////////////////////////////////////////////////////////////////////////////
/// @brief Function parameters for olGetPlatform
/// @details Each entry is a pointer to the parameter passed to the function;
@@ -1133,6 +1160,15 @@ typedef struct ol_set_kernel_arg_value_params_t {
void **pArgData;
} ol_set_kernel_arg_value_params_t;
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Function parameters for olSetKernelArgsData
+/// @details Each entry is a pointer to the parameter passed to the function;
+typedef struct ol_set_kernel_args_data_params_t {
+ ol_kernel_handle_t *pKernel;
+ void **pArgsData;
+ size_t *pArgsDataSize;
+} ol_set_kernel_args_data_params_t;
+
///////////////////////////////////////////////////////////////////////////////
/// @brief Variant of olInit that also sets source code location information
/// @details See also ::olInit
@@ -1358,6 +1394,14 @@ OL_APIEXPORT ol_result_t OL_APICALL olSetKernelArgValueWithCodeLoc(
ol_kernel_handle_t Kernel, uint32_t Index, size_t Size, void *ArgData,
ol_code_location_t *CodeLocation);
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Variant of olSetKernelArgsData that also sets source code location
+/// information
+/// @details See also ::olSetKernelArgsData
+OL_APIEXPORT ol_result_t OL_APICALL olSetKernelArgsDataWithCodeLoc(
+ ol_kernel_handle_t Kernel, void *ArgsData, size_t ArgsDataSize,
+ ol_code_location_t *CodeLocation);
+
#if defined(__cplusplus)
} // extern "C"
#endif
diff --git a/offload/liboffload/include/generated/OffloadEntryPoints.inc b/offload/liboffload/include/generated/OffloadEntryPoints.inc
index 0ae3c36f95827..fd022795a5d40 100644
--- a/offload/liboffload/include/generated/OffloadEntryPoints.inc
+++ b/offload/liboffload/include/generated/OffloadEntryPoints.inc
@@ -1311,3 +1311,47 @@ ol_result_t olSetKernelArgValueWithCodeLoc(ol_kernel_handle_t Kernel,
currentCodeLocation() = nullptr;
return Result;
}
+
+///////////////////////////////////////////////////////////////////////////////
+ol_impl_result_t olSetKernelArgsData_val(ol_kernel_handle_t Kernel,
+ void *ArgsData, size_t ArgsDataSize) {
+ if (true /*enableParameterValidation*/) {
+ if (NULL == Kernel) {
+ return OL_ERRC_INVALID_NULL_HANDLE;
+ }
+
+ if (NULL == ArgsData) {
+ return OL_ERRC_INVALID_NULL_POINTER;
+ }
+ }
+
+ return olSetKernelArgsData_impl(Kernel, ArgsData, ArgsDataSize);
+}
+OL_APIEXPORT ol_result_t OL_APICALL olSetKernelArgsData(
+ ol_kernel_handle_t Kernel, void *ArgsData, size_t ArgsDataSize) {
+ if (offloadConfig().TracingEnabled) {
+ std::cout << "---> olSetKernelArgsData";
+ }
+
+ ol_result_t Result = olSetKernelArgsData_val(Kernel, ArgsData, ArgsDataSize);
+
+ if (offloadConfig().TracingEnabled) {
+ ol_set_kernel_args_data_params_t Params = {&Kernel, &ArgsData,
+ &ArgsDataSize};
+ std::cout << "(" << &Params << ")";
+ std::cout << "-> " << Result << "\n";
+ if (Result && Result->Details) {
+ std::cout << " *Error Details* " << Result->Details << " \n";
+ }
+ }
+ return Result;
+}
+ol_result_t olSetKernelArgsDataWithCodeLoc(ol_kernel_handle_t Kernel,
+ void *ArgsData, size_t ArgsDataSize,
+ ol_code_location_t *CodeLocation) {
+ currentCodeLocation() = CodeLocation;
+ ol_result_t Result = olSetKernelArgsData(Kernel, ArgsData, ArgsDataSize);
+
+ currentCodeLocation() = nullptr;
+ return Result;
+}
diff --git a/offload/liboffload/include/generated/OffloadFuncs.inc b/offload/liboffload/include/generated/OffloadFuncs.inc
index 6f2bb34599a1d..05a8e47251254 100644
--- a/offload/liboffload/include/generated/OffloadFuncs.inc
+++ b/offload/liboffload/include/generated/OffloadFuncs.inc
@@ -40,6 +40,7 @@ OFFLOAD_FUNC(olCreateKernel)
OFFLOAD_FUNC(olRetainKernel)
OFFLOAD_FUNC(olReleaseKernel)
OFFLOAD_FUNC(olSetKernelArgValue)
+OFFLOAD_FUNC(olSetKernelArgsData)
OFFLOAD_FUNC(olInitWithCodeLoc)
OFFLOAD_FUNC(olShutDownWithCodeLoc)
OFFLOAD_FUNC(olGetPlatformWithCodeLoc)
@@ -70,5 +71,6 @@ OFFLOAD_FUNC(olCreateKernelWithCodeLoc)
OFFLOAD_FUNC(olRetainKernelWithCodeLoc)
OFFLOAD_FUNC(olReleaseKernelWithCodeLoc)
OFFLOAD_FUNC(olSetKernelArgValueWithCodeLoc)
+OFFLOAD_FUNC(olSetKernelArgsDataWithCodeLoc)
#undef OFFLOAD_FUNC
diff --git a/offload/liboffload/include/generated/OffloadImplFuncDecls.inc b/offload/liboffload/include/generated/OffloadImplFuncDecls.inc
index 9d21d8fc97090..9401b20f97c11 100644
--- a/offload/liboffload/include/generated/OffloadImplFuncDecls.inc
+++ b/offload/liboffload/include/generated/OffloadImplFuncDecls.inc
@@ -97,3 +97,6 @@ ol_impl_result_t olReleaseKernel_impl(ol_kernel_handle_t Kernel);
ol_impl_result_t olSetKernelArgValue_impl(ol_kernel_handle_t Kernel,
uint32_t Index, size_t Size,
void *ArgData);
+
+ol_impl_result_t olSetKernelArgsData_impl(ol_kernel_handle_t Kernel,
+ void *ArgsData, size_t ArgsDataSize);
diff --git a/offload/liboffload/include/generated/OffloadPrint.hpp b/offload/liboffload/include/generated/OffloadPrint.hpp
index 698b422fc38d0..a9656d4ee45d6 100644
--- a/offload/liboffload/include/generated/OffloadPrint.hpp
+++ b/offload/liboffload/include/generated/OffloadPrint.hpp
@@ -665,6 +665,20 @@ operator<<(std::ostream &os,
return os;
}
+inline std::ostream &
+operator<<(std::ostream &os,
+ const struct ol_set_kernel_args_data_params_t *params) {
+ os << ".Kernel = ";
+ printPtr(os, *params->pKernel);
+ os << ", ";
+ os << ".ArgsData = ";
+ printPtr(os, *params->pArgsData);
+ os << ", ";
+ os << ".ArgsDataSize = ";
+ os << *params->pArgsDataSize;
+ return os;
+}
+
///////////////////////////////////////////////////////////////////////////////
// @brief Print pointer value
template <typename T>
diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp
index 1fbb424021eef..7d57c0696ad9e 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -54,40 +54,20 @@ struct ol_program_handle_t_ {
};
struct OffloadArguments {
- static constexpr size_t MaxParamBytes = 4000u;
+ static constexpr size_t MaxParamBytes = 4096u;
using args_t = std::array<char, MaxParamBytes>;
using args_size_t = std::vector<size_t>;
- using args_index_t = std::vector<void *>;
+ using args_ptr_t = std::vector<void *>;
args_t Storage;
args_size_t ParamSizes;
- args_index_t Indices;
- args_size_t OffsetPerIndex;
-
- std::uint32_t ImplicitOffsetArgs[3] = {0, 0, 0};
-
- // NOTE:
- // This implementation is an exact copy of the CUDA adapter's argument
- // implementation. Even though it was designed for CUDA, the design of
- // libomptarget means it should work for other plugins as they will expect
- // the same argument layout.
- OffloadArguments() {
- // Place the implicit offset index at the end of the indicies collection
- Indices.emplace_back(&ImplicitOffsetArgs);
- }
-
- /// Add an argument to the kernel.
- /// If the argument existed before, it is replaced.
- /// Otherwise, it is added.
- /// Gaps are filled with empty arguments.
- /// Implicit offset argument is kept at the back of the indices collection.
- void addArg(size_t Index, size_t Size, const void *Arg,
- size_t LocalSize = 0) {
- if (Index + 2 > Indices.size()) {
- // Move implicit offset argument index with the end
- Indices.resize(Index + 2, Indices.back());
- // Ensure enough space for the new argument
+ args_ptr_t Pointers;
+
+ // Add an argument. If it already exists, it is replaced. Gaps are filled with
+ // empty arguments. Previous setArgsData calls are invalidated.
+ void addArg(size_t Index, size_t Size, const void *Arg) {
+ if (Index + 1 > Pointers.size()) {
+ Pointers.resize(Index + 1);
ParamSizes.resize(Index + 1);
- OffsetPerIndex.resize(Index + 1);
}
ParamSizes[Index] = Size;
// calculate the insertion point on the array
@@ -95,46 +75,17 @@ struct OffloadArguments {
std::begin(ParamSizes) + Index, 0);
// Update the stored value for the argument
std::memcpy(&Storage[InsertPos], Arg, Size);
- Indices[Index] = &Storage[InsertPos];
- OffsetPerIndex[Index] = LocalSize;
+ Pointers[Index] = &Storage[InsertPos];
}
- void addLocalArg(size_t Index, size_t Size) {
- size_t LocalOffset = this->getLocalSize();
-
- // maximum required alignment is the size of the largest vector type
- const size_t MaxAlignment = sizeof(double) * 16;
-
- // for arguments smaller than the maximum alignment simply align to the
- // size of the argument
- const size_t Alignment = std::min(MaxAlignment, Size);
-
- // align the argument
- size_t AlignedLocalOffset = LocalOffset;
- size_t Pad = LocalOffset % Alignment;
- if (Pad != 0) {
- AlignedLocalOffset += Alignment - Pad;
- }
-
- addArg(Index, sizeof(size_t), (const void *)&(AlignedLocalOffset),
- Size + (AlignedLocalOffset - LocalOffset));
+ // Set all argument data at once. Previous addArg calls are invalidated.
+ void setArgsData(const void *Data, size_t Size) {
+ std::memcpy(Storage.data(), Data, Size);
+ Pointers.clear();
+ ParamSizes.clear();
}
- void setImplicitOffset(size_t Size, std::uint32_t *ImplicitOffset) {
- assert(Size == sizeof(std::uint32_t) * 3);
- std::memcpy(ImplicitOffsetArgs, ImplicitOffset, Size);
- }
-
- void clearLocalSize() {
- std::fill(std::begin(OffsetPerIndex), std::end(OffsetPerIndex), 0);
- }
-
- const args_index_t &getIndices() const noexcept { return Indices; }
-
- uint32_t getLocalSize() const {
- return std::accumulate(std::begin(OffsetPerIndex), std::end(OffsetPerIndex),
- 0);
- }
+ const args_ptr_t &getPointers() const noexcept { return Pointers; }
const char *getStorage() const noexcept { return Storage.data(); }
};
@@ -618,7 +569,7 @@ ol_impl_result_t olEnqueueKernelLaunch_impl(ol_queue_handle_t Queue,
AsyncInfoWrapperTy AsyncInfoWrapper(DeviceImpl, Queue->AsyncInfo);
KernelArgsTy LaunchArgs{};
- LaunchArgs.NumArgs = Kernel->Args.getIndices().size() - 1; // TODO
+ LaunchArgs.NumArgs = Kernel->Args.getPointers().size();
LaunchArgs.NumTeams[0] = GlobalWorkSize[0];
LaunchArgs.NumTeams[1] = 1;
LaunchArgs.NumTeams[2] = 1;
@@ -628,7 +579,7 @@ ol_impl_result_t olEnqueueKernelLaunch_impl(ol_queue_handle_t Queue,
LaunchArgs.ArgPtrs = (void **)Kernel->Args.getStorage();
- // TODO: Verify this
+ // No offsets needed, arguments are real pointers
auto ArgOffsets = std::vector<ptrdiff_t>(LaunchArgs.NumArgs, 0ul);
auto Err = Kernel->KernelImpl->launch(
@@ -646,3 +597,9 @@ ol_impl_result_t olEnqueueKernelLaunch_impl(ol_queue_handle_t Queue,
return OL_SUCCESS;
}
+
+ol_impl_result_t olSetKernelArgsData_impl(ol_kernel_handle_t Kernel,
+ void *ArgsData, size_t ArgsDataSize) {
+ Kernel->Args.setArgsData(ArgsData, ArgsDataSize);
+ return OL_SUCCESS;
+}
>From df9eb3e97edc8504c86e3940bb0f1f3065a9658e Mon Sep 17 00:00:00 2001
From: Callum Fare <callum at codeplay.com>
Date: Tue, 4 Feb 2025 17:23:06 +0000
Subject: [PATCH 06/16] Update Offload unit tests
---
offload/unittests/OffloadAPI/CMakeLists.txt | 8 ++++
.../unittests/OffloadAPI/common/Fixtures.hpp | 18 ++++++-
.../OffloadAPI/enqueue/olEnqueueDataCopy.cpp | 36 ++++++++++++++
.../OffloadAPI/enqueue/olEnqueueDataRead.cpp | 29 ++++++++++++
.../OffloadAPI/enqueue/olEnqueueDataWrite.cpp | 26 ++++++++++
.../OffloadAPI/memory/olMemAlloc.cpp | 45 ++++++++++++++++++
.../unittests/OffloadAPI/memory/olMemFree.cpp | 47 +++++++++++++++++++
.../OffloadAPI/platform/olPlatformInfo.hpp | 1 +
.../OffloadAPI/queue/olCreateQueue.cpp | 9 ++++
.../OffloadAPI/queue/olFinishQueue.cpp | 17 +++++++
.../OffloadAPI/queue/olReleaseQueue.cpp | 21 +++++++++
.../OffloadAPI/queue/olRetainQueue.cpp | 20 ++++++++
12 files changed, 276 insertions(+), 1 deletion(-)
create mode 100644 offload/unittests/OffloadAPI/enqueue/olEnqueueDataCopy.cpp
create mode 100644 offload/unittests/OffloadAPI/enqueue/olEnqueueDataRead.cpp
create mode 100644 offload/unittests/OffloadAPI/enqueue/olEnqueueDataWrite.cpp
create mode 100644 offload/unittests/OffloadAPI/memory/olMemAlloc.cpp
create mode 100644 offload/unittests/OffloadAPI/memory/olMemFree.cpp
create mode 100644 offload/unittests/OffloadAPI/queue/olFinishQueue.cpp
create mode 100644 offload/unittests/OffloadAPI/queue/olReleaseQueue.cpp
create mode 100644 offload/unittests/OffloadAPI/queue/olRetainQueue.cpp
diff --git a/offload/unittests/OffloadAPI/CMakeLists.txt b/offload/unittests/OffloadAPI/CMakeLists.txt
index e0d790684898d..c7f28d147db14 100644
--- a/offload/unittests/OffloadAPI/CMakeLists.txt
+++ b/offload/unittests/OffloadAPI/CMakeLists.txt
@@ -12,6 +12,14 @@ add_libompt_unittest("offload.unittests"
${CMAKE_CURRENT_SOURCE_DIR}/device/olGetDeviceInfo.cpp
${CMAKE_CURRENT_SOURCE_DIR}/device/olGetDeviceInfoSize.cpp
${CMAKE_CURRENT_SOURCE_DIR}/queue/olCreateQueue.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/queue/olFinishQueue.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/queue/olReleaseQueue.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/queue/olRetainQueue.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/memory/olMemAlloc.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/memory/olMemFree.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/enqueue/olEnqueueDataWrite.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/enqueue/olEnqueueDataRead.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/enqueue/olEnqueueDataCopy.cpp
)
add_dependencies("offload.unittests" ${PLUGINS_TEST_COMMON})
target_link_libraries("offload.unittests" PRIVATE ${PLUGINS_TEST_COMMON})
diff --git a/offload/unittests/OffloadAPI/common/Fixtures.hpp b/offload/unittests/OffloadAPI/common/Fixtures.hpp
index 410a435dee1b5..2b85137a77d82 100644
--- a/offload/unittests/OffloadAPI/common/Fixtures.hpp
+++ b/offload/unittests/OffloadAPI/common/Fixtures.hpp
@@ -60,5 +60,21 @@ struct offloadDeviceTest : offloadPlatformTest {
ASSERT_SUCCESS(olGetDevice(Platform, 1, &Device));
}
- ol_device_handle_t Device;
+ ol_device_handle_t Device = nullptr;
+};
+
+struct offloadQueueTest : offloadDeviceTest {
+ void SetUp() override {
+ RETURN_ON_FATAL_FAILURE(offloadDeviceTest::SetUp());
+ ASSERT_SUCCESS(olCreateQueue(Device, &Queue));
+ }
+
+ void TearDown() override {
+ if (Queue) {
+ olReleaseQueue(Queue);
+ }
+ RETURN_ON_FATAL_FAILURE(offloadDeviceTest::TearDown());
+ }
+
+ ol_queue_handle_t Queue = nullptr;
};
diff --git a/offload/unittests/OffloadAPI/enqueue/olEnqueueDataCopy.cpp b/offload/unittests/OffloadAPI/enqueue/olEnqueueDataCopy.cpp
new file mode 100644
index 0000000000000..afc5866821e36
--- /dev/null
+++ b/offload/unittests/OffloadAPI/enqueue/olEnqueueDataCopy.cpp
@@ -0,0 +1,36 @@
+//===------- Offload API tests - olEnqueueDataCopy ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "../common/Fixtures.hpp"
+#include <OffloadAPI.h>
+#include <gtest/gtest.h>
+
+using olEnqueueDataCopyTest = offloadQueueTest;
+
+TEST_F(olEnqueueDataCopyTest, Success) {
+ constexpr size_t Size = 1024;
+ void *AllocA;
+ void *AllocB;
+ std::vector<uint8_t> Input(Size, 42);
+ std::vector<uint8_t> Output(Size, 0);
+
+ ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, Size, 0, &AllocA));
+ ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, Size, 0, &AllocB));
+ ASSERT_SUCCESS(
+ olEnqueueDataWrite(Queue, Input.data(), AllocA, Size, nullptr));
+ ASSERT_SUCCESS(
+ olEnqueueDataCopy(Queue, AllocA, AllocB, Device, Size, nullptr));
+ ASSERT_SUCCESS(
+ olEnqueueDataRead(Queue, AllocB, Output.data(), Size, nullptr));
+ ASSERT_SUCCESS(olFinishQueue(Queue));
+ for (uint8_t Val : Output) {
+ ASSERT_EQ(Val, 42);
+ }
+ ASSERT_SUCCESS(olMemFree(Device, OL_ALLOC_TYPE_DEVICE, AllocA));
+ ASSERT_SUCCESS(olMemFree(Device, OL_ALLOC_TYPE_DEVICE, AllocB));
+}
diff --git a/offload/unittests/OffloadAPI/enqueue/olEnqueueDataRead.cpp b/offload/unittests/OffloadAPI/enqueue/olEnqueueDataRead.cpp
new file mode 100644
index 0000000000000..76d3490cc8737
--- /dev/null
+++ b/offload/unittests/OffloadAPI/enqueue/olEnqueueDataRead.cpp
@@ -0,0 +1,29 @@
+//===------- Offload API tests - olEnqueueDataRead ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "../common/Fixtures.hpp"
+#include <OffloadAPI.h>
+#include <gtest/gtest.h>
+
+using olEnqueueDataReadTest = offloadQueueTest;
+
+TEST_F(olEnqueueDataReadTest, Success) {
+ constexpr size_t Size = 1024;
+ void *Alloc;
+ std::vector<uint8_t> Input(Size, 42);
+ std::vector<uint8_t> Output(Size, 0);
+
+ ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, Size, 0, &Alloc));
+ ASSERT_SUCCESS(olEnqueueDataWrite(Queue, Input.data(), Alloc, Size, nullptr));
+ ASSERT_SUCCESS(olEnqueueDataRead(Queue, Alloc, Output.data(), Size, nullptr));
+ ASSERT_SUCCESS(olFinishQueue(Queue));
+ for (uint8_t Val : Output) {
+ ASSERT_EQ(Val, 42);
+ }
+ ASSERT_SUCCESS(olMemFree(Device, OL_ALLOC_TYPE_DEVICE, Alloc));
+}
diff --git a/offload/unittests/OffloadAPI/enqueue/olEnqueueDataWrite.cpp b/offload/unittests/OffloadAPI/enqueue/olEnqueueDataWrite.cpp
new file mode 100644
index 0000000000000..ad66887643d56
--- /dev/null
+++ b/offload/unittests/OffloadAPI/enqueue/olEnqueueDataWrite.cpp
@@ -0,0 +1,26 @@
+//===------- Offload API tests - olEnqueueDataWrite -----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "../common/Fixtures.hpp"
+#include <OffloadAPI.h>
+#include <gtest/gtest.h>
+
+using olEnqueueDataWriteTest = offloadQueueTest;
+
+TEST_F(olEnqueueDataWriteTest, Success) {
+ constexpr size_t Size = 1024;
+ void *Alloc;
+ ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, Size, 0, &Alloc));
+ std::vector<uint8_t> Input(Size, 42);
+ ASSERT_SUCCESS(
+ olEnqueueDataWrite(Queue, Input.data(), Alloc, Size, nullptr));
+ olFinishQueue(Queue);
+ olMemFree(Device, OL_ALLOC_TYPE_DEVICE, Alloc);
+}
+
+
diff --git a/offload/unittests/OffloadAPI/memory/olMemAlloc.cpp b/offload/unittests/OffloadAPI/memory/olMemAlloc.cpp
new file mode 100644
index 0000000000000..e951231d4a0e9
--- /dev/null
+++ b/offload/unittests/OffloadAPI/memory/olMemAlloc.cpp
@@ -0,0 +1,45 @@
+//===------- Offload API tests - olMemAlloc -------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "../common/Fixtures.hpp"
+#include <OffloadAPI.h>
+#include <gtest/gtest.h>
+
+using olMemAllocTest = offloadDeviceTest;
+
+TEST_F(olMemAllocTest, SuccessAllocShared) {
+ void *Alloc = nullptr;
+ ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_SHARED, 1024, 0, &Alloc));
+ ASSERT_NE(Alloc, nullptr);
+ olMemFree(Device, OL_ALLOC_TYPE_SHARED, Alloc);
+}
+
+TEST_F(olMemAllocTest, SuccessAllocHost) {
+ void *Alloc = nullptr;
+ ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_HOST, 1024, 0, &Alloc));
+ ASSERT_NE(Alloc, nullptr);
+ olMemFree(Device, OL_ALLOC_TYPE_HOST, Alloc);
+}
+
+TEST_F(olMemAllocTest, SuccessAllocDevice) {
+ void *Alloc = nullptr;
+ ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, 1024, 0, &Alloc));
+ ASSERT_NE(Alloc, nullptr);
+ olMemFree(Device, OL_ALLOC_TYPE_DEVICE, Alloc);
+}
+
+TEST_F(olMemAllocTest, InvalidNullDevice) {
+ void *Alloc = nullptr;
+ ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE,
+ olMemAlloc(nullptr, OL_ALLOC_TYPE_DEVICE, 1024, 0, &Alloc));
+}
+
+TEST_F(olMemAllocTest, InvalidNullOutPtr) {
+ ASSERT_ERROR(OL_ERRC_INVALID_NULL_POINTER,
+ olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, 1024, 0, nullptr));
+}
diff --git a/offload/unittests/OffloadAPI/memory/olMemFree.cpp b/offload/unittests/OffloadAPI/memory/olMemFree.cpp
new file mode 100644
index 0000000000000..54e8a24f9fbba
--- /dev/null
+++ b/offload/unittests/OffloadAPI/memory/olMemFree.cpp
@@ -0,0 +1,47 @@
+//===------- Offload API tests - olMemFree --------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "../common/Fixtures.hpp"
+#include <OffloadAPI.h>
+#include <gtest/gtest.h>
+
+using olMemFreeTest = offloadDeviceTest;
+
+TEST_F(olMemFreeTest, SuccessFreeShared) {
+ void *Alloc = nullptr;
+ ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_SHARED, 1024, 0, &Alloc));
+ ASSERT_SUCCESS(olMemFree(Device, OL_ALLOC_TYPE_SHARED, Alloc));
+}
+
+TEST_F(olMemFreeTest, SuccessFreeHost) {
+ void *Alloc = nullptr;
+ ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_HOST, 1024, 0, &Alloc));
+ ASSERT_SUCCESS(olMemFree(Device, OL_ALLOC_TYPE_HOST, Alloc));
+}
+
+TEST_F(olMemFreeTest, SuccessFreeDevice) {
+ void *Alloc = nullptr;
+ ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, 1024, 0, &Alloc));
+ ASSERT_SUCCESS(olMemFree(Device, OL_ALLOC_TYPE_DEVICE, Alloc));
+}
+
+TEST_F(olMemFreeTest, InvalidNullDevice) {
+ void *Alloc = nullptr;
+ ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, 1024, 0, &Alloc));
+ ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE,
+ olMemFree(nullptr, OL_ALLOC_TYPE_DEVICE, &Alloc));
+ ASSERT_SUCCESS(olMemFree(Device, OL_ALLOC_TYPE_DEVICE, Alloc));
+}
+
+TEST_F(olMemFreeTest, InvalidNullPtr) {
+ void *Alloc = nullptr;
+ ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, 1024, 0, &Alloc));
+ ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE,
+ olMemFree(nullptr, OL_ALLOC_TYPE_DEVICE, &Alloc));
+ ASSERT_SUCCESS(olMemFree(Device, OL_ALLOC_TYPE_DEVICE, Alloc));
+}
diff --git a/offload/unittests/OffloadAPI/platform/olPlatformInfo.hpp b/offload/unittests/OffloadAPI/platform/olPlatformInfo.hpp
index d49cdb90d321a..f61bca0cf52f0 100644
--- a/offload/unittests/OffloadAPI/platform/olPlatformInfo.hpp
+++ b/offload/unittests/OffloadAPI/platform/olPlatformInfo.hpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#pragma once
+#include <unordered_map>
#include <vector>
// TODO: We could autogenerate these
diff --git a/offload/unittests/OffloadAPI/queue/olCreateQueue.cpp b/offload/unittests/OffloadAPI/queue/olCreateQueue.cpp
index f542dac4bb2d8..0e19f03c11776 100644
--- a/offload/unittests/OffloadAPI/queue/olCreateQueue.cpp
+++ b/offload/unittests/OffloadAPI/queue/olCreateQueue.cpp
@@ -17,3 +17,12 @@ TEST_F(olCreateQueueTest, Success) {
ASSERT_SUCCESS(olCreateQueue(Device, &Queue));
ASSERT_NE(Queue, nullptr);
}
+
+TEST_F(olCreateQueueTest, InvalidNullHandleDevice) {
+ ol_queue_handle_t Queue = nullptr;
+ ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE, olCreateQueue(nullptr, &Queue));
+}
+
+TEST_F(olCreateQueueTest, InvalidNullPointerQueue) {
+ ASSERT_ERROR(OL_ERRC_INVALID_NULL_POINTER, olCreateQueue(Device, nullptr));
+}
diff --git a/offload/unittests/OffloadAPI/queue/olFinishQueue.cpp b/offload/unittests/OffloadAPI/queue/olFinishQueue.cpp
new file mode 100644
index 0000000000000..7c7d3553083fb
--- /dev/null
+++ b/offload/unittests/OffloadAPI/queue/olFinishQueue.cpp
@@ -0,0 +1,17 @@
+//===------- Offload API tests - olWaitQueue ------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "../common/Fixtures.hpp"
+#include <OffloadAPI.h>
+#include <gtest/gtest.h>
+
+using olFinishQueueTest = offloadQueueTest;
+
+TEST_F(olFinishQueueTest, SuccessEmptyQueue) {
+ ASSERT_SUCCESS(olFinishQueue(Queue));
+}
diff --git a/offload/unittests/OffloadAPI/queue/olReleaseQueue.cpp b/offload/unittests/OffloadAPI/queue/olReleaseQueue.cpp
new file mode 100644
index 0000000000000..392f49bc2f80d
--- /dev/null
+++ b/offload/unittests/OffloadAPI/queue/olReleaseQueue.cpp
@@ -0,0 +1,21 @@
+//===------- Offload API tests - olRetainQueue ----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "../common/Fixtures.hpp"
+#include <OffloadAPI.h>
+#include <gtest/gtest.h>
+
+using olReleaseQueueTest = offloadQueueTest;
+
+// TODO: When we can fetch queue info we can check the reference count is
+// changing in an expected way. In the meantime just check the entry point
+// doesn't blow up.
+TEST_F(olReleaseQueueTest, Success) {
+ ASSERT_SUCCESS(olRetainQueue(Queue));
+ ASSERT_SUCCESS(olReleaseQueue(Queue));
+}
diff --git a/offload/unittests/OffloadAPI/queue/olRetainQueue.cpp b/offload/unittests/OffloadAPI/queue/olRetainQueue.cpp
new file mode 100644
index 0000000000000..9e499d849c742
--- /dev/null
+++ b/offload/unittests/OffloadAPI/queue/olRetainQueue.cpp
@@ -0,0 +1,20 @@
+//===------- Offload API tests - olRetainQueue ----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "../common/Fixtures.hpp"
+#include <OffloadAPI.h>
+#include <gtest/gtest.h>
+
+using olRetainQueueTest = offloadQueueTest;
+
+// TODO: When we can fetch queue info we can check the reference count is
+// changing in the expected way. In the meantime just check the entry point
+// doesn't blow up.
+TEST_F(olRetainQueueTest, Success) {
+ ASSERT_SUCCESS(olRetainQueue(Queue));
+}
>From 71326aedc8a5748b15652054a42e61a173501d21 Mon Sep 17 00:00:00 2001
From: Callum Fare <callum at codeplay.com>
Date: Wed, 5 Feb 2025 11:53:19 +0000
Subject: [PATCH 07/16] Kernel launch size arguments
---
offload/liboffload/API/Enqueue.td | 16 +++++-
.../liboffload/include/generated/OffloadAPI.h | 24 ++++++--
.../include/generated/OffloadEntryPoints.inc | 34 ++++++------
.../generated/OffloadImplFuncDecls.inc | 8 +--
.../include/generated/OffloadPrint.hpp | 55 ++++++++++++++++++-
offload/liboffload/src/OffloadImpl.cpp | 20 +++----
offload/tools/offload-tblgen/PrintGen.cpp | 36 +++++++++++-
offload/tools/offload-tblgen/RecordTypes.hpp | 2 +
8 files changed, 153 insertions(+), 42 deletions(-)
diff --git a/offload/liboffload/API/Enqueue.td b/offload/liboffload/API/Enqueue.td
index 621eb3a2f410e..d9215e8175ef8 100644
--- a/offload/liboffload/API/Enqueue.td
+++ b/offload/liboffload/API/Enqueue.td
@@ -54,6 +54,20 @@ def : Function {
}
+def : Struct {
+ let name = "ol_kernel_launch_size_args_t";
+ let desc = "Size-related arguments for a kernel launch.";
+ let members = [
+ StructMember<"size_t", "Dimensions", "Number of work dimensions">,
+ StructMember<"size_t", "NumGroupsX", "Number of work groups on the X dimension">,
+ StructMember<"size_t", "NumGroupsY", "Number of work groups on the Y dimension">,
+ StructMember<"size_t", "NumGroupsZ", "Number of work groups on the Z dimension">,
+ StructMember<"size_t", "GroupSizeX", "Size of a work group on the X dimension.">,
+ StructMember<"size_t", "GroupSizeY", "Size of a work group on the Y dimension.">,
+ StructMember<"size_t", "GroupSizeZ", "Size of a work group on the Z dimension.">
+ ];
+}
+
def : Function {
let name = "olEnqueueKernelLaunch";
let desc = "Enqueue a kernel launch with the specified size and parameters";
@@ -61,7 +75,7 @@ def : Function {
let params = [
Param<"ol_queue_handle_t", "Queue", "handle of the queue", PARAM_IN>,
Param<"ol_kernel_handle_t", "Kernel", "handle of the kernel", PARAM_IN>,
- Param<"const size_t*", "GlobalWorkSize", "an array of size 3 representing the global work size", PARAM_IN>,
+ Param<"const ol_kernel_launch_size_args_t*", "LaunchSizeArgs", "pointer to the struct containing launch size parameters", PARAM_IN>,
Param<"ol_event_handle_t*", "EventOut", "optional recorded event for the enqueued operation", PARAM_OUT_OPTIONAL>
];
let returns = [];
diff --git a/offload/liboffload/include/generated/OffloadAPI.h b/offload/liboffload/include/generated/OffloadAPI.h
index 155e31338c88b..4f4ff51bef5d0 100644
--- a/offload/liboffload/include/generated/OffloadAPI.h
+++ b/offload/liboffload/include/generated/OffloadAPI.h
@@ -735,6 +735,18 @@ OL_APIEXPORT ol_result_t OL_APICALL olEnqueueDataCopy(
// [out][optional] optional recorded event for the enqueued operation
ol_event_handle_t *EventOut);
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Size-related arguments for a kernel launch.
+typedef struct ol_kernel_launch_size_args_t {
+ size_t Dimensions; /// Number of work dimensions
+ size_t NumGroupsX; /// Number of work groups on the X dimension
+ size_t NumGroupsY; /// Number of work groups on the Y dimension
+ size_t NumGroupsZ; /// Number of work groups on the Z dimension
+ size_t GroupSizeX; /// Size of a work group on the X dimension.
+ size_t GroupSizeY; /// Size of a work group on the Y dimension.
+ size_t GroupSizeZ; /// Size of a work group on the Z dimension.
+} ol_kernel_launch_size_args_t;
+
///////////////////////////////////////////////////////////////////////////////
/// @brief Enqueue a kernel launch with the specified size and parameters
///
@@ -748,14 +760,14 @@ OL_APIEXPORT ol_result_t OL_APICALL olEnqueueDataCopy(
/// + `NULL == Queue`
/// + `NULL == Kernel`
/// - ::OL_ERRC_INVALID_NULL_POINTER
-/// + `NULL == GlobalWorkSize`
+/// + `NULL == LaunchSizeArgs`
OL_APIEXPORT ol_result_t OL_APICALL olEnqueueKernelLaunch(
// [in] handle of the queue
ol_queue_handle_t Queue,
// [in] handle of the kernel
ol_kernel_handle_t Kernel,
- // [in] an array of size 3 representing the global work size
- const size_t *GlobalWorkSize,
+ // [in] pointer to the struct containing launch size parameters
+ const ol_kernel_launch_size_args_t *LaunchSizeArgs,
// [out][optional] optional recorded event for the enqueued operation
ol_event_handle_t *EventOut);
@@ -1099,7 +1111,7 @@ typedef struct ol_enqueue_data_copy_params_t {
typedef struct ol_enqueue_kernel_launch_params_t {
ol_queue_handle_t *pQueue;
ol_kernel_handle_t *pKernel;
- const size_t **pGlobalWorkSize;
+ const ol_kernel_launch_size_args_t **pLaunchSizeArgs;
ol_event_handle_t **pEventOut;
} ol_enqueue_kernel_launch_params_t;
@@ -1339,8 +1351,8 @@ OL_APIEXPORT ol_result_t OL_APICALL olEnqueueDataCopyWithCodeLoc(
/// @details See also ::olEnqueueKernelLaunch
OL_APIEXPORT ol_result_t OL_APICALL olEnqueueKernelLaunchWithCodeLoc(
ol_queue_handle_t Queue, ol_kernel_handle_t Kernel,
- const size_t *GlobalWorkSize, ol_event_handle_t *EventOut,
- ol_code_location_t *CodeLocation);
+ const ol_kernel_launch_size_args_t *LaunchSizeArgs,
+ ol_event_handle_t *EventOut, ol_code_location_t *CodeLocation);
///////////////////////////////////////////////////////////////////////////////
/// @brief Variant of olCreateProgram that also sets source code location
diff --git a/offload/liboffload/include/generated/OffloadEntryPoints.inc b/offload/liboffload/include/generated/OffloadEntryPoints.inc
index fd022795a5d40..57cf4d64744cc 100644
--- a/offload/liboffload/include/generated/OffloadEntryPoints.inc
+++ b/offload/liboffload/include/generated/OffloadEntryPoints.inc
@@ -964,10 +964,10 @@ ol_result_t olEnqueueDataCopyWithCodeLoc(ol_queue_handle_t Queue, void *SrcPtr,
}
///////////////////////////////////////////////////////////////////////////////
-ol_impl_result_t olEnqueueKernelLaunch_val(ol_queue_handle_t Queue,
- ol_kernel_handle_t Kernel,
- const size_t *GlobalWorkSize,
- ol_event_handle_t *EventOut) {
+ol_impl_result_t
+olEnqueueKernelLaunch_val(ol_queue_handle_t Queue, ol_kernel_handle_t Kernel,
+ const ol_kernel_launch_size_args_t *LaunchSizeArgs,
+ ol_event_handle_t *EventOut) {
if (true /*enableParameterValidation*/) {
if (NULL == Queue) {
return OL_ERRC_INVALID_NULL_HANDLE;
@@ -977,26 +977,27 @@ ol_impl_result_t olEnqueueKernelLaunch_val(ol_queue_handle_t Queue,
return OL_ERRC_INVALID_NULL_HANDLE;
}
- if (NULL == GlobalWorkSize) {
+ if (NULL == LaunchSizeArgs) {
return OL_ERRC_INVALID_NULL_POINTER;
}
}
- return olEnqueueKernelLaunch_impl(Queue, Kernel, GlobalWorkSize, EventOut);
+ return olEnqueueKernelLaunch_impl(Queue, Kernel, LaunchSizeArgs, EventOut);
}
-OL_APIEXPORT ol_result_t OL_APICALL olEnqueueKernelLaunch(
- ol_queue_handle_t Queue, ol_kernel_handle_t Kernel,
- const size_t *GlobalWorkSize, ol_event_handle_t *EventOut) {
+OL_APIEXPORT ol_result_t OL_APICALL
+olEnqueueKernelLaunch(ol_queue_handle_t Queue, ol_kernel_handle_t Kernel,
+ const ol_kernel_launch_size_args_t *LaunchSizeArgs,
+ ol_event_handle_t *EventOut) {
if (offloadConfig().TracingEnabled) {
std::cout << "---> olEnqueueKernelLaunch";
}
ol_result_t Result =
- olEnqueueKernelLaunch_val(Queue, Kernel, GlobalWorkSize, EventOut);
+ olEnqueueKernelLaunch_val(Queue, Kernel, LaunchSizeArgs, EventOut);
if (offloadConfig().TracingEnabled) {
ol_enqueue_kernel_launch_params_t Params = {&Queue, &Kernel,
- &GlobalWorkSize, &EventOut};
+ &LaunchSizeArgs, &EventOut};
std::cout << "(" << &Params << ")";
std::cout << "-> " << Result << "\n";
if (Result && Result->Details) {
@@ -1005,14 +1006,13 @@ OL_APIEXPORT ol_result_t OL_APICALL olEnqueueKernelLaunch(
}
return Result;
}
-ol_result_t olEnqueueKernelLaunchWithCodeLoc(ol_queue_handle_t Queue,
- ol_kernel_handle_t Kernel,
- const size_t *GlobalWorkSize,
- ol_event_handle_t *EventOut,
- ol_code_location_t *CodeLocation) {
+ol_result_t olEnqueueKernelLaunchWithCodeLoc(
+ ol_queue_handle_t Queue, ol_kernel_handle_t Kernel,
+ const ol_kernel_launch_size_args_t *LaunchSizeArgs,
+ ol_event_handle_t *EventOut, ol_code_location_t *CodeLocation) {
currentCodeLocation() = CodeLocation;
ol_result_t Result =
- olEnqueueKernelLaunch(Queue, Kernel, GlobalWorkSize, EventOut);
+ olEnqueueKernelLaunch(Queue, Kernel, LaunchSizeArgs, EventOut);
currentCodeLocation() = nullptr;
return Result;
diff --git a/offload/liboffload/include/generated/OffloadImplFuncDecls.inc b/offload/liboffload/include/generated/OffloadImplFuncDecls.inc
index 9401b20f97c11..e7204e594973e 100644
--- a/offload/liboffload/include/generated/OffloadImplFuncDecls.inc
+++ b/offload/liboffload/include/generated/OffloadImplFuncDecls.inc
@@ -73,10 +73,10 @@ ol_impl_result_t olEnqueueDataCopy_impl(ol_queue_handle_t Queue, void *SrcPtr,
size_t Size,
ol_event_handle_t *EventOut);
-ol_impl_result_t olEnqueueKernelLaunch_impl(ol_queue_handle_t Queue,
- ol_kernel_handle_t Kernel,
- const size_t *GlobalWorkSize,
- ol_event_handle_t *EventOut);
+ol_impl_result_t
+olEnqueueKernelLaunch_impl(ol_queue_handle_t Queue, ol_kernel_handle_t Kernel,
+ const ol_kernel_launch_size_args_t *LaunchSizeArgs,
+ ol_event_handle_t *EventOut);
ol_impl_result_t olCreateProgram_impl(ol_device_handle_t Device, void *ProgData,
size_t ProgDataSize,
diff --git a/offload/liboffload/include/generated/OffloadPrint.hpp b/offload/liboffload/include/generated/OffloadPrint.hpp
index a9656d4ee45d6..157bee0cd07a8 100644
--- a/offload/liboffload/include/generated/OffloadPrint.hpp
+++ b/offload/liboffload/include/generated/OffloadPrint.hpp
@@ -309,6 +309,57 @@ inline std::ostream &operator<<(std::ostream &os,
}
return os;
}
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Print operator for the ol_code_location_t type
+/// @returns std::ostream &
+
+inline std::ostream &operator<<(std::ostream &os,
+ const struct ol_code_location_t params) {
+ os << "(struct ol_code_location_t){";
+ os << ".FunctionName = ";
+ printPtr(os, params.FunctionName);
+ os << ", ";
+ os << ".SourceFile = ";
+ printPtr(os, params.SourceFile);
+ os << ", ";
+ os << ".LineNumber = ";
+ os << params.LineNumber;
+ os << ", ";
+ os << ".ColumnNumber = ";
+ os << params.ColumnNumber;
+ os << "}";
+ return os;
+}
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Print operator for the ol_kernel_launch_size_args_t type
+/// @returns std::ostream &
+
+inline std::ostream &
+operator<<(std::ostream &os, const struct ol_kernel_launch_size_args_t params) {
+ os << "(struct ol_kernel_launch_size_args_t){";
+ os << ".Dimensions = ";
+ os << params.Dimensions;
+ os << ", ";
+ os << ".NumGroupsX = ";
+ os << params.NumGroupsX;
+ os << ", ";
+ os << ".NumGroupsY = ";
+ os << params.NumGroupsY;
+ os << ", ";
+ os << ".NumGroupsZ = ";
+ os << params.NumGroupsZ;
+ os << ", ";
+ os << ".GroupSizeX = ";
+ os << params.GroupSizeX;
+ os << ", ";
+ os << ".GroupSizeY = ";
+ os << params.GroupSizeY;
+ os << ", ";
+ os << ".GroupSizeZ = ";
+ os << params.GroupSizeZ;
+ os << "}";
+ return os;
+}
inline std::ostream &operator<<(std::ostream &os,
const struct ol_get_platform_params_t *params) {
@@ -583,8 +634,8 @@ operator<<(std::ostream &os,
os << ".Kernel = ";
printPtr(os, *params->pKernel);
os << ", ";
- os << ".GlobalWorkSize = ";
- printPtr(os, *params->pGlobalWorkSize);
+ os << ".LaunchSizeArgs = ";
+ printPtr(os, *params->pLaunchSizeArgs);
os << ", ";
os << ".EventOut = ";
printPtr(os, *params->pEventOut);
diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp
index 7d57c0696ad9e..a0057879b2bbe 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -560,22 +560,22 @@ ol_impl_result_t olSetKernelArgValue_impl(ol_kernel_handle_t Kernel,
return OL_SUCCESS;
}
-ol_impl_result_t olEnqueueKernelLaunch_impl(ol_queue_handle_t Queue,
- ol_kernel_handle_t Kernel,
- const size_t *GlobalWorkSize,
- ol_event_handle_t *EventOut) {
+ol_impl_result_t
+olEnqueueKernelLaunch_impl(ol_queue_handle_t Queue, ol_kernel_handle_t Kernel,
+ const ol_kernel_launch_size_args_t *LaunchSizeArgs,
+ ol_event_handle_t *EventOut) {
auto &DeviceImpl = Queue->Device->Device;
AsyncInfoWrapperTy AsyncInfoWrapper(DeviceImpl, Queue->AsyncInfo);
KernelArgsTy LaunchArgs{};
LaunchArgs.NumArgs = Kernel->Args.getPointers().size();
- LaunchArgs.NumTeams[0] = GlobalWorkSize[0];
- LaunchArgs.NumTeams[1] = 1;
- LaunchArgs.NumTeams[2] = 1;
- LaunchArgs.ThreadLimit[0] = 1;
- LaunchArgs.ThreadLimit[1] = 1;
- LaunchArgs.ThreadLimit[2] = 1;
+ LaunchArgs.NumTeams[0] = LaunchSizeArgs->NumGroupsX;
+ LaunchArgs.NumTeams[1] = LaunchSizeArgs->NumGroupsY;
+ LaunchArgs.NumTeams[2] = LaunchSizeArgs->NumGroupsZ;
+ LaunchArgs.ThreadLimit[0] = LaunchSizeArgs->GroupSizeX;
+ LaunchArgs.ThreadLimit[1] = LaunchSizeArgs->GroupSizeY;
+ LaunchArgs.ThreadLimit[2] = LaunchSizeArgs->GroupSizeZ;
LaunchArgs.ArgPtrs = (void **)Kernel->Args.getStorage();
diff --git a/offload/tools/offload-tblgen/PrintGen.cpp b/offload/tools/offload-tblgen/PrintGen.cpp
index 2a7c63c3dfd1f..d7a63b68451b0 100644
--- a/offload/tools/offload-tblgen/PrintGen.cpp
+++ b/offload/tools/offload-tblgen/PrintGen.cpp
@@ -20,7 +20,7 @@
using namespace llvm;
using namespace offload::tblgen;
-constexpr auto PrintEnumHeader =
+constexpr auto PrintTypeHeader =
R"(///////////////////////////////////////////////////////////////////////////////
/// @brief Print operator for the {0} type
/// @returns std::ostream &
@@ -33,7 +33,7 @@ constexpr auto PrintTaggedEnumHeader =
)";
static void ProcessEnum(const EnumRec &Enum, raw_ostream &OS) {
- OS << formatv(PrintEnumHeader, Enum.getName());
+ OS << formatv(PrintTypeHeader, Enum.getName());
OS << formatv(
"inline std::ostream &operator<<(std::ostream &os, enum {0} value) "
"{{\n" TAB_1 "switch (value) {{\n",
@@ -150,6 +150,33 @@ inline std::ostream &operator<<(std::ostream &os, const struct {0} *params) {{
OS << TAB_1 "return os;\n}\n";
}
+
+void ProcessStruct(const StructRec &Struct, raw_ostream &OS) {
+ if (Struct.getName() == "ol_error_struct_t") {
+ return;
+ }
+ OS << formatv(PrintTypeHeader, Struct.getName());
+ OS << formatv(R"(
+inline std::ostream &operator<<(std::ostream &os, const struct {0} params) {{
+)",
+ Struct.getName());
+ OS << formatv(TAB_1 "os << \"(struct {0}){{\";\n", Struct.getName());
+ for (const auto &Member : Struct.getMembers()) {
+ OS << formatv(TAB_1 "os << \".{0} = \";\n", Member.getName());
+ if (Member.isPointerType() || Member.isHandleType()) {
+ OS << formatv(TAB_1 "printPtr(os, params.{0});\n", Member.getName());
+ } else {
+ OS << formatv(TAB_1 "os << params.{0};\n", Member.getName());
+ }
+ if (Member.getName() != Struct.getMembers().back().getName()) {
+ OS << TAB_1 "os << \", \";\n";
+ }
+ }
+ OS << TAB_1 "os << \"}\";\n";
+ OS << TAB_1 "return os;\n";
+ OS << "}\n";
+}
+
void EmitOffloadPrintHeader(const RecordKeeper &Records, raw_ostream &OS) {
OS << GenericHeader;
OS << R"""(
@@ -193,6 +220,11 @@ template <typename T> inline void printTagged(std::ostream &os, const void *ptr,
}
EmitResultPrint(OS);
+ for (auto *R : Records.getAllDerivedDefinitions("Struct")) {
+ StructRec S{R};
+ ProcessStruct(S, OS);
+ }
+
// Emit print functions for the function param structs
for (auto *R : Records.getAllDerivedDefinitions("Function")) {
EmitFunctionParamStructPrint(FunctionRec{R}, OS);
diff --git a/offload/tools/offload-tblgen/RecordTypes.hpp b/offload/tools/offload-tblgen/RecordTypes.hpp
index 0bf3256c525d9..9faf361f4dd76 100644
--- a/offload/tools/offload-tblgen/RecordTypes.hpp
+++ b/offload/tools/offload-tblgen/RecordTypes.hpp
@@ -103,6 +103,8 @@ class StructMemberRec {
StringRef getType() const { return rec->getValueAsString("type"); }
StringRef getName() const { return rec->getValueAsString("name"); }
StringRef getDesc() const { return rec->getValueAsString("desc"); }
+ bool isPointerType() const { return getType().ends_with('*'); }
+ bool isHandleType() const { return getType().ends_with("_handle_t"); }
private:
const Record *rec;
>From 81bd64629136e444469c157ea79b2c2b248916f8 Mon Sep 17 00:00:00 2001
From: Callum Fare <callum at codeplay.com>
Date: Wed, 5 Feb 2025 12:14:49 +0000
Subject: [PATCH 08/16] Remove currently unused alignment param
---
offload/liboffload/API/Memory.td | 1 -
.../liboffload/include/generated/OffloadAPI.h | 5 +----
.../include/generated/OffloadEntryPoints.inc | 17 +++++++----------
.../include/generated/OffloadImplFuncDecls.inc | 2 +-
.../include/generated/OffloadPrint.hpp | 3 ---
offload/liboffload/src/OffloadImpl.cpp | 4 ++--
.../OffloadAPI/enqueue/olEnqueueDataCopy.cpp | 4 ++--
.../OffloadAPI/enqueue/olEnqueueDataRead.cpp | 2 +-
.../OffloadAPI/enqueue/olEnqueueDataWrite.cpp | 17 +++++++----------
.../unittests/OffloadAPI/memory/olMemAlloc.cpp | 16 ++++++++--------
.../unittests/OffloadAPI/memory/olMemFree.cpp | 14 +++++++-------
11 files changed, 36 insertions(+), 49 deletions(-)
diff --git a/offload/liboffload/API/Memory.td b/offload/liboffload/API/Memory.td
index c15ae6f6d21ca..2c3f4c83980d0 100644
--- a/offload/liboffload/API/Memory.td
+++ b/offload/liboffload/API/Memory.td
@@ -27,7 +27,6 @@ def : Function {
Param<"ol_device_handle_t", "Device", "handle of the device to allocate on", PARAM_IN>,
Param<"ol_alloc_type_t", "Type", "type of the allocation", PARAM_IN>,
Param<"size_t", "Size", "size of the allocation in bytes", PARAM_IN>,
- Param<"size_t", "Aligment", "alignment of the allocation in bytes", PARAM_IN>,
Param<"void**", "AllocationOut", "output for the allocated pointer", PARAM_OUT>
];
let returns = [
diff --git a/offload/liboffload/include/generated/OffloadAPI.h b/offload/liboffload/include/generated/OffloadAPI.h
index 4f4ff51bef5d0..950c0e37ae67c 100644
--- a/offload/liboffload/include/generated/OffloadAPI.h
+++ b/offload/liboffload/include/generated/OffloadAPI.h
@@ -513,8 +513,6 @@ OL_APIEXPORT ol_result_t OL_APICALL olMemAlloc(
ol_alloc_type_t Type,
// [in] size of the allocation in bytes
size_t Size,
- // [in] alignment of the allocation in bytes
- size_t Aligment,
// [out] output for the allocated pointer
void **AllocationOut);
@@ -1008,7 +1006,6 @@ typedef struct ol_mem_alloc_params_t {
ol_device_handle_t *pDevice;
ol_alloc_type_t *pType;
size_t *pSize;
- size_t *pAligment;
void ***pAllocationOut;
} ol_mem_alloc_params_t;
@@ -1261,7 +1258,7 @@ OL_APIEXPORT ol_result_t OL_APICALL olGetDeviceInfoSizeWithCodeLoc(
/// @details See also ::olMemAlloc
OL_APIEXPORT ol_result_t OL_APICALL olMemAllocWithCodeLoc(
ol_device_handle_t Device, ol_alloc_type_t Type, size_t Size,
- size_t Aligment, void **AllocationOut, ol_code_location_t *CodeLocation);
+ void **AllocationOut, ol_code_location_t *CodeLocation);
///////////////////////////////////////////////////////////////////////////////
/// @brief Variant of olMemFree that also sets source code location information
diff --git a/offload/liboffload/include/generated/OffloadEntryPoints.inc b/offload/liboffload/include/generated/OffloadEntryPoints.inc
index 57cf4d64744cc..6d8f1d7c7171f 100644
--- a/offload/liboffload/include/generated/OffloadEntryPoints.inc
+++ b/offload/liboffload/include/generated/OffloadEntryPoints.inc
@@ -442,8 +442,7 @@ ol_result_t olGetDeviceInfoSizeWithCodeLoc(ol_device_handle_t Device,
///////////////////////////////////////////////////////////////////////////////
ol_impl_result_t olMemAlloc_val(ol_device_handle_t Device, ol_alloc_type_t Type,
- size_t Size, size_t Aligment,
- void **AllocationOut) {
+ size_t Size, void **AllocationOut) {
if (true /*enableParameterValidation*/) {
if (Size == 0) {
return OL_ERRC_INVALID_SIZE;
@@ -458,22 +457,20 @@ ol_impl_result_t olMemAlloc_val(ol_device_handle_t Device, ol_alloc_type_t Type,
}
}
- return olMemAlloc_impl(Device, Type, Size, Aligment, AllocationOut);
+ return olMemAlloc_impl(Device, Type, Size, AllocationOut);
}
OL_APIEXPORT ol_result_t OL_APICALL olMemAlloc(ol_device_handle_t Device,
ol_alloc_type_t Type,
- size_t Size, size_t Aligment,
+ size_t Size,
void **AllocationOut) {
if (offloadConfig().TracingEnabled) {
std::cout << "---> olMemAlloc";
}
- ol_result_t Result =
- olMemAlloc_val(Device, Type, Size, Aligment, AllocationOut);
+ ol_result_t Result = olMemAlloc_val(Device, Type, Size, AllocationOut);
if (offloadConfig().TracingEnabled) {
- ol_mem_alloc_params_t Params = {&Device, &Type, &Size, &Aligment,
- &AllocationOut};
+ ol_mem_alloc_params_t Params = {&Device, &Type, &Size, &AllocationOut};
std::cout << "(" << &Params << ")";
std::cout << "-> " << Result << "\n";
if (Result && Result->Details) {
@@ -484,10 +481,10 @@ OL_APIEXPORT ol_result_t OL_APICALL olMemAlloc(ol_device_handle_t Device,
}
ol_result_t olMemAllocWithCodeLoc(ol_device_handle_t Device,
ol_alloc_type_t Type, size_t Size,
- size_t Aligment, void **AllocationOut,
+ void **AllocationOut,
ol_code_location_t *CodeLocation) {
currentCodeLocation() = CodeLocation;
- ol_result_t Result = olMemAlloc(Device, Type, Size, Aligment, AllocationOut);
+ ol_result_t Result = olMemAlloc(Device, Type, Size, AllocationOut);
currentCodeLocation() = nullptr;
return Result;
diff --git a/offload/liboffload/include/generated/OffloadImplFuncDecls.inc b/offload/liboffload/include/generated/OffloadImplFuncDecls.inc
index e7204e594973e..e7179e44fc9ec 100644
--- a/offload/liboffload/include/generated/OffloadImplFuncDecls.inc
+++ b/offload/liboffload/include/generated/OffloadImplFuncDecls.inc
@@ -39,7 +39,7 @@ ol_impl_result_t olGetDeviceInfoSize_impl(ol_device_handle_t Device,
ol_impl_result_t olMemAlloc_impl(ol_device_handle_t Device,
ol_alloc_type_t Type, size_t Size,
- size_t Aligment, void **AllocationOut);
+ void **AllocationOut);
ol_impl_result_t olMemFree_impl(ol_device_handle_t Device, ol_alloc_type_t Type,
void *Address);
diff --git a/offload/liboffload/include/generated/OffloadPrint.hpp b/offload/liboffload/include/generated/OffloadPrint.hpp
index 157bee0cd07a8..5271832451dd6 100644
--- a/offload/liboffload/include/generated/OffloadPrint.hpp
+++ b/offload/liboffload/include/generated/OffloadPrint.hpp
@@ -489,9 +489,6 @@ inline std::ostream &operator<<(std::ostream &os,
os << ".Size = ";
os << *params->pSize;
os << ", ";
- os << ".Aligment = ";
- os << *params->pAligment;
- os << ", ";
os << ".AllocationOut = ";
printPtr(os, *params->pAllocationOut);
return os;
diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp
index a0057879b2bbe..a2d3730d3b303 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -322,7 +322,7 @@ TargetAllocTy convertOlToPluginAllocTy(ol_alloc_type_t Type) {
}
ol_impl_result_t olMemAlloc_impl(ol_device_handle_t Device,
- ol_alloc_type_t Type, size_t Size, size_t,
+ ol_alloc_type_t Type, size_t Size,
void **AllocationOut) {
auto Alloc =
Device->Device.dataAlloc(Size, nullptr, convertOlToPluginAllocTy(Type));
@@ -485,7 +485,7 @@ ol_impl_result_t olCreateProgram_impl(ol_device_handle_t Device, void *ProgData,
ol_program_handle_t *Program) {
auto ImageData = MemoryBuffer::getMemBufferCopy(
StringRef(reinterpret_cast<char *>(ProgData), ProgDataSize));
- __tgt_device_image DeviceImage{(char *) ImageData->getBuffer().data(),
+ __tgt_device_image DeviceImage{(char *)ImageData->getBuffer().data(),
((char *)ImageData->getBuffer().data()) +
ProgDataSize - 1,
nullptr, nullptr};
diff --git a/offload/unittests/OffloadAPI/enqueue/olEnqueueDataCopy.cpp b/offload/unittests/OffloadAPI/enqueue/olEnqueueDataCopy.cpp
index afc5866821e36..d15e738bc94e6 100644
--- a/offload/unittests/OffloadAPI/enqueue/olEnqueueDataCopy.cpp
+++ b/offload/unittests/OffloadAPI/enqueue/olEnqueueDataCopy.cpp
@@ -19,8 +19,8 @@ TEST_F(olEnqueueDataCopyTest, Success) {
std::vector<uint8_t> Input(Size, 42);
std::vector<uint8_t> Output(Size, 0);
- ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, Size, 0, &AllocA));
- ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, Size, 0, &AllocB));
+ ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, Size, &AllocA));
+ ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, Size, &AllocB));
ASSERT_SUCCESS(
olEnqueueDataWrite(Queue, Input.data(), AllocA, Size, nullptr));
ASSERT_SUCCESS(
diff --git a/offload/unittests/OffloadAPI/enqueue/olEnqueueDataRead.cpp b/offload/unittests/OffloadAPI/enqueue/olEnqueueDataRead.cpp
index 76d3490cc8737..5787889c4febb 100644
--- a/offload/unittests/OffloadAPI/enqueue/olEnqueueDataRead.cpp
+++ b/offload/unittests/OffloadAPI/enqueue/olEnqueueDataRead.cpp
@@ -18,7 +18,7 @@ TEST_F(olEnqueueDataReadTest, Success) {
std::vector<uint8_t> Input(Size, 42);
std::vector<uint8_t> Output(Size, 0);
- ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, Size, 0, &Alloc));
+ ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, Size, &Alloc));
ASSERT_SUCCESS(olEnqueueDataWrite(Queue, Input.data(), Alloc, Size, nullptr));
ASSERT_SUCCESS(olEnqueueDataRead(Queue, Alloc, Output.data(), Size, nullptr));
ASSERT_SUCCESS(olFinishQueue(Queue));
diff --git a/offload/unittests/OffloadAPI/enqueue/olEnqueueDataWrite.cpp b/offload/unittests/OffloadAPI/enqueue/olEnqueueDataWrite.cpp
index ad66887643d56..d3f3edf58a531 100644
--- a/offload/unittests/OffloadAPI/enqueue/olEnqueueDataWrite.cpp
+++ b/offload/unittests/OffloadAPI/enqueue/olEnqueueDataWrite.cpp
@@ -13,14 +13,11 @@
using olEnqueueDataWriteTest = offloadQueueTest;
TEST_F(olEnqueueDataWriteTest, Success) {
- constexpr size_t Size = 1024;
- void *Alloc;
- ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, Size, 0, &Alloc));
- std::vector<uint8_t> Input(Size, 42);
- ASSERT_SUCCESS(
- olEnqueueDataWrite(Queue, Input.data(), Alloc, Size, nullptr));
- olFinishQueue(Queue);
- olMemFree(Device, OL_ALLOC_TYPE_DEVICE, Alloc);
+ constexpr size_t Size = 1024;
+ void *Alloc;
+ ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, Size, &Alloc));
+ std::vector<uint8_t> Input(Size, 42);
+ ASSERT_SUCCESS(olEnqueueDataWrite(Queue, Input.data(), Alloc, Size, nullptr));
+ olFinishQueue(Queue);
+ olMemFree(Device, OL_ALLOC_TYPE_DEVICE, Alloc);
}
-
-
diff --git a/offload/unittests/OffloadAPI/memory/olMemAlloc.cpp b/offload/unittests/OffloadAPI/memory/olMemAlloc.cpp
index e951231d4a0e9..47388801b2e58 100644
--- a/offload/unittests/OffloadAPI/memory/olMemAlloc.cpp
+++ b/offload/unittests/OffloadAPI/memory/olMemAlloc.cpp
@@ -14,21 +14,21 @@ using olMemAllocTest = offloadDeviceTest;
TEST_F(olMemAllocTest, SuccessAllocShared) {
void *Alloc = nullptr;
- ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_SHARED, 1024, 0, &Alloc));
+ ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_SHARED, 1024, &Alloc));
ASSERT_NE(Alloc, nullptr);
olMemFree(Device, OL_ALLOC_TYPE_SHARED, Alloc);
}
TEST_F(olMemAllocTest, SuccessAllocHost) {
- void *Alloc = nullptr;
- ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_HOST, 1024, 0, &Alloc));
- ASSERT_NE(Alloc, nullptr);
- olMemFree(Device, OL_ALLOC_TYPE_HOST, Alloc);
+ void *Alloc = nullptr;
+ ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_HOST, 1024, &Alloc));
+ ASSERT_NE(Alloc, nullptr);
+ olMemFree(Device, OL_ALLOC_TYPE_HOST, Alloc);
}
TEST_F(olMemAllocTest, SuccessAllocDevice) {
void *Alloc = nullptr;
- ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, 1024, 0, &Alloc));
+ ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, 1024, &Alloc));
ASSERT_NE(Alloc, nullptr);
olMemFree(Device, OL_ALLOC_TYPE_DEVICE, Alloc);
}
@@ -36,10 +36,10 @@ TEST_F(olMemAllocTest, SuccessAllocDevice) {
TEST_F(olMemAllocTest, InvalidNullDevice) {
void *Alloc = nullptr;
ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE,
- olMemAlloc(nullptr, OL_ALLOC_TYPE_DEVICE, 1024, 0, &Alloc));
+ olMemAlloc(nullptr, OL_ALLOC_TYPE_DEVICE, 1024, &Alloc));
}
TEST_F(olMemAllocTest, InvalidNullOutPtr) {
ASSERT_ERROR(OL_ERRC_INVALID_NULL_POINTER,
- olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, 1024, 0, nullptr));
+ olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, 1024, nullptr));
}
diff --git a/offload/unittests/OffloadAPI/memory/olMemFree.cpp b/offload/unittests/OffloadAPI/memory/olMemFree.cpp
index 54e8a24f9fbba..647c81a4e9536 100644
--- a/offload/unittests/OffloadAPI/memory/olMemFree.cpp
+++ b/offload/unittests/OffloadAPI/memory/olMemFree.cpp
@@ -14,25 +14,25 @@ using olMemFreeTest = offloadDeviceTest;
TEST_F(olMemFreeTest, SuccessFreeShared) {
void *Alloc = nullptr;
- ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_SHARED, 1024, 0, &Alloc));
+ ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_SHARED, 1024, &Alloc));
ASSERT_SUCCESS(olMemFree(Device, OL_ALLOC_TYPE_SHARED, Alloc));
}
TEST_F(olMemFreeTest, SuccessFreeHost) {
- void *Alloc = nullptr;
- ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_HOST, 1024, 0, &Alloc));
- ASSERT_SUCCESS(olMemFree(Device, OL_ALLOC_TYPE_HOST, Alloc));
+ void *Alloc = nullptr;
+ ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_HOST, 1024, &Alloc));
+ ASSERT_SUCCESS(olMemFree(Device, OL_ALLOC_TYPE_HOST, Alloc));
}
TEST_F(olMemFreeTest, SuccessFreeDevice) {
void *Alloc = nullptr;
- ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, 1024, 0, &Alloc));
+ ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, 1024, &Alloc));
ASSERT_SUCCESS(olMemFree(Device, OL_ALLOC_TYPE_DEVICE, Alloc));
}
TEST_F(olMemFreeTest, InvalidNullDevice) {
void *Alloc = nullptr;
- ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, 1024, 0, &Alloc));
+ ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, 1024, &Alloc));
ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE,
olMemFree(nullptr, OL_ALLOC_TYPE_DEVICE, &Alloc));
ASSERT_SUCCESS(olMemFree(Device, OL_ALLOC_TYPE_DEVICE, Alloc));
@@ -40,7 +40,7 @@ TEST_F(olMemFreeTest, InvalidNullDevice) {
TEST_F(olMemFreeTest, InvalidNullPtr) {
void *Alloc = nullptr;
- ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, 1024, 0, &Alloc));
+ ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, 1024, &Alloc));
ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE,
olMemFree(nullptr, OL_ALLOC_TYPE_DEVICE, &Alloc));
ASSERT_SUCCESS(olMemFree(Device, OL_ALLOC_TYPE_DEVICE, Alloc));
>From 85391843fec4de6918022623f1a48c784f0f1106 Mon Sep 17 00:00:00 2001
From: Callum Fare <callum at codeplay.com>
Date: Wed, 5 Feb 2025 12:20:40 +0000
Subject: [PATCH 09/16] Fix formatting
---
offload/tools/offload-tblgen/PrintGen.cpp | 1 -
offload/unittests/OffloadAPI/queue/olRetainQueue.cpp | 4 +---
2 files changed, 1 insertion(+), 4 deletions(-)
diff --git a/offload/tools/offload-tblgen/PrintGen.cpp b/offload/tools/offload-tblgen/PrintGen.cpp
index d7a63b68451b0..43a9c8478e1ff 100644
--- a/offload/tools/offload-tblgen/PrintGen.cpp
+++ b/offload/tools/offload-tblgen/PrintGen.cpp
@@ -150,7 +150,6 @@ inline std::ostream &operator<<(std::ostream &os, const struct {0} *params) {{
OS << TAB_1 "return os;\n}\n";
}
-
void ProcessStruct(const StructRec &Struct, raw_ostream &OS) {
if (Struct.getName() == "ol_error_struct_t") {
return;
diff --git a/offload/unittests/OffloadAPI/queue/olRetainQueue.cpp b/offload/unittests/OffloadAPI/queue/olRetainQueue.cpp
index 9e499d849c742..eec921ffba5ef 100644
--- a/offload/unittests/OffloadAPI/queue/olRetainQueue.cpp
+++ b/offload/unittests/OffloadAPI/queue/olRetainQueue.cpp
@@ -15,6 +15,4 @@ using olRetainQueueTest = offloadQueueTest;
// TODO: When we can fetch queue info we can check the reference count is
// changing in the expected way. In the meantime just check the entry point
// doesn't blow up.
-TEST_F(olRetainQueueTest, Success) {
- ASSERT_SUCCESS(olRetainQueue(Queue));
-}
+TEST_F(olRetainQueueTest, Success) { ASSERT_SUCCESS(olRetainQueue(Queue)); }
>From 20acc170d6b26ff739dbc17c4d80531dba414ab4 Mon Sep 17 00:00:00 2001
From: Callum Fare <callum at codeplay.com>
Date: Wed, 5 Feb 2025 16:00:38 +0000
Subject: [PATCH 10/16] Fix leak in olReleaseQueue
---
offload/liboffload/src/OffloadImpl.cpp | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp
index a2d3730d3b303..7dcb5d935d535 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -364,7 +364,9 @@ ol_impl_result_t olRetainQueue_impl(ol_queue_handle_t Queue) {
}
ol_impl_result_t olReleaseQueue_impl(ol_queue_handle_t Queue) {
- Queue->RefCount--;
+ if (--Queue->RefCount == 0) {
+ delete Queue;
+ }
return OL_SUCCESS;
}
>From 3423f701beef7ea0823d99cd1468705a5f5f2c19 Mon Sep 17 00:00:00 2001
From: Callum Fare <callum at codeplay.com>
Date: Thu, 6 Feb 2025 16:49:19 +0000
Subject: [PATCH 11/16] General tidy up; improve documentation and formatting
---
offload/liboffload/API/Common.td | 7 +-
offload/liboffload/API/Enqueue.td | 5 +-
offload/liboffload/API/Event.td | 4 +-
offload/liboffload/API/Kernel.td | 22 ++-
offload/liboffload/API/Program.td | 10 +-
offload/liboffload/API/Queue.td | 8 +-
.../liboffload/include/generated/OffloadAPI.h | 63 +++++----
.../include/generated/OffloadEntryPoints.inc | 4 +
.../include/generated/OffloadPrint.hpp | 17 +--
offload/liboffload/src/OffloadImpl.cpp | 125 ++++++++----------
10 files changed, 142 insertions(+), 123 deletions(-)
diff --git a/offload/liboffload/API/Common.td b/offload/liboffload/API/Common.td
index 7fedb2002f157..a0a2697e27e77 100644
--- a/offload/liboffload/API/Common.td
+++ b/offload/liboffload/API/Common.td
@@ -89,12 +89,11 @@ def : Enum {
Etor<"SUCCESS", "Success">,
Etor<"INVALID_VALUE", "Invalid Value">,
Etor<"INVALID_PLATFORM", "Invalid platform">,
- Etor<"DEVICE_NOT_FOUND", "Device not found">,
Etor<"INVALID_DEVICE", "Invalid device">,
- Etor<"DEVICE_LOST", "Device hung, reset, was removed, or driver update occurred">,
- Etor<"UNINITIALIZED", "plugin is not initialized or specific entry-point is not implemented">,
+ Etor<"INVALID_QUEUE", "Invalid queue">,
+ Etor<"INVALID_EVENT", "Invalid event">,
+ Etor<"INVALID_KERNEL_NAME", "Named kernel not found in the program binary">,
Etor<"OUT_OF_RESOURCES", "Out of resources">,
- Etor<"UNSUPPORTED_VERSION", "generic error code for unsupported versions">,
Etor<"UNSUPPORTED_FEATURE", "generic error code for unsupported features">,
Etor<"INVALID_ARGUMENT", "generic error code for invalid arguments">,
Etor<"INVALID_NULL_HANDLE", "handle argument is not valid">,
diff --git a/offload/liboffload/API/Enqueue.td b/offload/liboffload/API/Enqueue.td
index d9215e8175ef8..f503bf3c44139 100644
--- a/offload/liboffload/API/Enqueue.td
+++ b/offload/liboffload/API/Enqueue.td
@@ -21,7 +21,9 @@ def : Function {
Param<"size_t", "Size", "size in bytes of data to copy", PARAM_IN>,
Param<"ol_event_handle_t*", "EventOut", "optional recorded event for the enqueued operation", PARAM_OUT_OPTIONAL>
];
- let returns = [];
+ let returns = [
+ Return<"OL_ERRC_INVALID_SIZE", ["`Size == 0`"]>
+ ];
}
def : Function {
@@ -53,7 +55,6 @@ def : Function {
let returns = [];
}
-
def : Struct {
let name = "ol_kernel_launch_size_args_t";
let desc = "Size-related arguments for a kernel launch.";
diff --git a/offload/liboffload/API/Event.td b/offload/liboffload/API/Event.td
index db90a7c8e2be4..836a4755f3c87 100644
--- a/offload/liboffload/API/Event.td
+++ b/offload/liboffload/API/Event.td
@@ -12,7 +12,7 @@
def : Function {
let name = "olRetainEvent";
- let desc = "Increment the reference count of the given event";
+ let desc = "Increment the event's reference count";
let details = [];
let params = [
Param<"ol_event_handle_t", "Event", "handle of the event", PARAM_IN>
@@ -22,7 +22,7 @@ def : Function {
def : Function {
let name = "olReleaseEvent";
- let desc = "Decrement the reference count of the given event";
+ let desc = "Decrement the event's reference count, and free it if the reference count reaches 0";
let details = [];
let params = [
Param<"ol_event_handle_t", "Event", "handle of the event", PARAM_IN>
diff --git a/offload/liboffload/API/Kernel.td b/offload/liboffload/API/Kernel.td
index 4c8c84e9c71de..cad738c56b3a3 100644
--- a/offload/liboffload/API/Kernel.td
+++ b/offload/liboffload/API/Kernel.td
@@ -1,7 +1,21 @@
+//===-- Kernel.td - Kernel definitions for Offload ---------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains Offload API definitions related to the kernel handle
+//
+//===----------------------------------------------------------------------===//
+
def : Function {
let name = "olCreateKernel";
- let desc = "";
- let details = [];
+ let desc = "Create a kernel from the function identified by `KernelName` in the given program";
+ let details = [
+ "The created kernel has an initial reference count of 1."
+ ];
let params = [
Param<"ol_program_handle_t", "Program", "handle of the program", PARAM_IN>,
Param<"const char*", "KernelName", "name of the kernel entry point in the program", PARAM_IN>,
@@ -12,7 +26,7 @@ def : Function {
def : Function {
let name = "olRetainKernel";
- let desc = "Increment the reference count of the given kernel";
+ let desc = "Increment the kernel's reference count";
let details = [];
let params = [
Param<"ol_kernel_handle_t", "Kernel", "handle of the kernel", PARAM_IN>
@@ -22,7 +36,7 @@ def : Function {
def : Function {
let name = "olReleaseKernel";
- let desc = "Decrement the reference count of the given kernel";
+ let desc = "Decrement the kernel's reference count, and free it if the reference count reaches 0";
let details = [];
let params = [
Param<"ol_kernel_handle_t", "Kernel", "handle of the kernel", PARAM_IN>
diff --git a/offload/liboffload/API/Program.td b/offload/liboffload/API/Program.td
index 684a6581320f8..d928f3d0cc2f1 100644
--- a/offload/liboffload/API/Program.td
+++ b/offload/liboffload/API/Program.td
@@ -12,8 +12,10 @@
def : Function {
let name = "olCreateProgram";
- let desc = "";
- let details = [];
+ let desc = "Create a program for the device from the binary image pointed to by `ProgData`";
+ let details = [
+ "The created program has an initial reference count of 1."
+ ];
let params = [
Param<"ol_device_handle_t", "Device", "handle of the device", PARAM_IN>,
Param<"void*", "ProgData", "pointer to the program binary data", PARAM_IN>,
@@ -25,7 +27,7 @@ def : Function {
def : Function {
let name = "olRetainProgram";
- let desc = "Create a queue for the given device";
+ let desc = "Increment the program's reference count";
let details = [];
let params = [
Param<"ol_program_handle_t", "Program", "handle of the program", PARAM_IN>
@@ -35,7 +37,7 @@ def : Function {
def : Function {
let name = "olReleaseProgram";
- let desc = "Create a queue for the given device";
+ let desc = "Decrement the program's reference count, and free it if the reference count reaches 0";
let details = [];
let params = [
Param<"ol_program_handle_t", "Program", "handle of the program", PARAM_IN>
diff --git a/offload/liboffload/API/Queue.td b/offload/liboffload/API/Queue.td
index 5629fa40d56d5..786840a8e2141 100644
--- a/offload/liboffload/API/Queue.td
+++ b/offload/liboffload/API/Queue.td
@@ -13,7 +13,9 @@
def : Function {
let name = "olCreateQueue";
let desc = "Create a queue for the given device";
- let details = [];
+ let details = [
+ "The created queue has an initial reference count of 1."
+ ];
let params = [
Param<"ol_device_handle_t", "Device", "handle of the device", PARAM_IN>,
Param<"ol_queue_handle_t*", "Queue", "output pointer for the created queue", PARAM_OUT>
@@ -23,7 +25,7 @@ def : Function {
def : Function {
let name = "olRetainQueue";
- let desc = "Create a queue for the given device";
+ let desc = "Increment the queue's reference count.";
let details = [];
let params = [
Param<"ol_queue_handle_t", "Queue", "handle of the queue", PARAM_IN>
@@ -33,7 +35,7 @@ def : Function {
def : Function {
let name = "olReleaseQueue";
- let desc = "Create a queue for the given device";
+ let desc = "Decrement the queues's reference count, and free it if the reference count reaches 0";
let details = [];
let params = [
Param<"ol_queue_handle_t", "Queue", "handle of the queue", PARAM_IN>
diff --git a/offload/liboffload/include/generated/OffloadAPI.h b/offload/liboffload/include/generated/OffloadAPI.h
index 950c0e37ae67c..f8683af811ef7 100644
--- a/offload/liboffload/include/generated/OffloadAPI.h
+++ b/offload/liboffload/include/generated/OffloadAPI.h
@@ -110,34 +110,32 @@ typedef enum ol_errc_t {
OL_ERRC_INVALID_VALUE = 1,
/// Invalid platform
OL_ERRC_INVALID_PLATFORM = 2,
- /// Device not found
- OL_ERRC_DEVICE_NOT_FOUND = 3,
/// Invalid device
- OL_ERRC_INVALID_DEVICE = 4,
- /// Device hung, reset, was removed, or driver update occurred
- OL_ERRC_DEVICE_LOST = 5,
- /// plugin is not initialized or specific entry-point is not implemented
- OL_ERRC_UNINITIALIZED = 6,
+ OL_ERRC_INVALID_DEVICE = 3,
+ /// Invalid queue
+ OL_ERRC_INVALID_QUEUE = 4,
+ /// Invalid event
+ OL_ERRC_INVALID_EVENT = 5,
+ /// Named kernel not found in the program binary
+ OL_ERRC_INVALID_KERNEL_NAME = 6,
/// Out of resources
OL_ERRC_OUT_OF_RESOURCES = 7,
- /// generic error code for unsupported versions
- OL_ERRC_UNSUPPORTED_VERSION = 8,
/// generic error code for unsupported features
- OL_ERRC_UNSUPPORTED_FEATURE = 9,
+ OL_ERRC_UNSUPPORTED_FEATURE = 8,
/// generic error code for invalid arguments
- OL_ERRC_INVALID_ARGUMENT = 10,
+ OL_ERRC_INVALID_ARGUMENT = 9,
/// handle argument is not valid
- OL_ERRC_INVALID_NULL_HANDLE = 11,
+ OL_ERRC_INVALID_NULL_HANDLE = 10,
/// pointer argument may not be nullptr
- OL_ERRC_INVALID_NULL_POINTER = 12,
+ OL_ERRC_INVALID_NULL_POINTER = 11,
/// invalid size or dimensions (e.g., must not be zero, or is out of bounds)
- OL_ERRC_INVALID_SIZE = 13,
+ OL_ERRC_INVALID_SIZE = 12,
/// enumerator argument is not valid
- OL_ERRC_INVALID_ENUMERATION = 14,
+ OL_ERRC_INVALID_ENUMERATION = 13,
/// enumerator argument is not supported by the device
- OL_ERRC_UNSUPPORTED_ENUMERATION = 15,
+ OL_ERRC_UNSUPPORTED_ENUMERATION = 14,
/// Unknown or internal error
- OL_ERRC_UNKNOWN = 16,
+ OL_ERRC_UNKNOWN = 15,
/// @cond
OL_ERRC_FORCE_UINT32 = 0x7fffffff
/// @endcond
@@ -541,6 +539,7 @@ OL_APIEXPORT ol_result_t OL_APICALL olMemFree(
/// @brief Create a queue for the given device
///
/// @details
+/// - The created queue has an initial reference count of 1.
///
/// @returns
/// - ::OL_RESULT_SUCCESS
@@ -557,7 +556,7 @@ OL_APIEXPORT ol_result_t OL_APICALL olCreateQueue(
ol_queue_handle_t *Queue);
///////////////////////////////////////////////////////////////////////////////
-/// @brief Create a queue for the given device
+/// @brief Increment the queue's reference count.
///
/// @details
///
@@ -573,7 +572,8 @@ OL_APIEXPORT ol_result_t OL_APICALL olRetainQueue(
ol_queue_handle_t Queue);
///////////////////////////////////////////////////////////////////////////////
-/// @brief Create a queue for the given device
+/// @brief Decrement the queues's reference count, and free it if the reference
+/// count reaches 0
///
/// @details
///
@@ -605,7 +605,7 @@ OL_APIEXPORT ol_result_t OL_APICALL olFinishQueue(
ol_queue_handle_t Queue);
///////////////////////////////////////////////////////////////////////////////
-/// @brief Increment the reference count of the given event
+/// @brief Increment the event's reference count
///
/// @details
///
@@ -621,7 +621,8 @@ OL_APIEXPORT ol_result_t OL_APICALL olRetainEvent(
ol_event_handle_t Event);
///////////////////////////////////////////////////////////////////////////////
-/// @brief Decrement the reference count of the given event
+/// @brief Decrement the event's reference count, and free it if the reference
+/// count reaches 0
///
/// @details
///
@@ -661,6 +662,8 @@ OL_APIEXPORT ol_result_t OL_APICALL olWaitEvent(
/// - ::OL_RESULT_SUCCESS
/// - ::OL_ERRC_UNINITIALIZED
/// - ::OL_ERRC_DEVICE_LOST
+/// - ::OL_ERRC_INVALID_SIZE
+/// + `Size == 0`
/// - ::OL_ERRC_INVALID_NULL_HANDLE
/// + `NULL == Queue`
/// - ::OL_ERRC_INVALID_NULL_POINTER
@@ -770,9 +773,11 @@ OL_APIEXPORT ol_result_t OL_APICALL olEnqueueKernelLaunch(
ol_event_handle_t *EventOut);
///////////////////////////////////////////////////////////////////////////////
-/// @brief
+/// @brief Create a program for the device from the binary image pointed to by
+/// `ProgData`
///
/// @details
+/// - The created program has an initial reference count of 1.
///
/// @returns
/// - ::OL_RESULT_SUCCESS
@@ -794,7 +799,7 @@ OL_APIEXPORT ol_result_t OL_APICALL olCreateProgram(
ol_program_handle_t *Queue);
///////////////////////////////////////////////////////////////////////////////
-/// @brief Create a queue for the given device
+/// @brief Increment the program's reference count
///
/// @details
///
@@ -810,7 +815,8 @@ OL_APIEXPORT ol_result_t OL_APICALL olRetainProgram(
ol_program_handle_t Program);
///////////////////////////////////////////////////////////////////////////////
-/// @brief Create a queue for the given device
+/// @brief Decrement the program's reference count, and free it if the reference
+/// count reaches 0
///
/// @details
///
@@ -826,9 +832,11 @@ OL_APIEXPORT ol_result_t OL_APICALL olReleaseProgram(
ol_program_handle_t Program);
///////////////////////////////////////////////////////////////////////////////
-/// @brief
+/// @brief Create a kernel from the function identified by `KernelName` in the
+/// given program
///
/// @details
+/// - The created kernel has an initial reference count of 1.
///
/// @returns
/// - ::OL_RESULT_SUCCESS
@@ -848,7 +856,7 @@ OL_APIEXPORT ol_result_t OL_APICALL olCreateKernel(
ol_kernel_handle_t *Kernel);
///////////////////////////////////////////////////////////////////////////////
-/// @brief Increment the reference count of the given kernel
+/// @brief Increment the kernel's reference count
///
/// @details
///
@@ -864,7 +872,8 @@ OL_APIEXPORT ol_result_t OL_APICALL olRetainKernel(
ol_kernel_handle_t Kernel);
///////////////////////////////////////////////////////////////////////////////
-/// @brief Decrement the reference count of the given kernel
+/// @brief Decrement the kernel's reference count, and free it if the reference
+/// count reaches 0
///
/// @details
///
diff --git a/offload/liboffload/include/generated/OffloadEntryPoints.inc b/offload/liboffload/include/generated/OffloadEntryPoints.inc
index 6d8f1d7c7171f..ffef36b5a9ac4 100644
--- a/offload/liboffload/include/generated/OffloadEntryPoints.inc
+++ b/offload/liboffload/include/generated/OffloadEntryPoints.inc
@@ -798,6 +798,10 @@ ol_impl_result_t olEnqueueDataWrite_val(ol_queue_handle_t Queue, void *SrcPtr,
void *DstPtr, size_t Size,
ol_event_handle_t *EventOut) {
if (true /*enableParameterValidation*/) {
+ if (Size == 0) {
+ return OL_ERRC_INVALID_SIZE;
+ }
+
if (NULL == Queue) {
return OL_ERRC_INVALID_NULL_HANDLE;
}
diff --git a/offload/liboffload/include/generated/OffloadPrint.hpp b/offload/liboffload/include/generated/OffloadPrint.hpp
index 5271832451dd6..56ab655a4ae74 100644
--- a/offload/liboffload/include/generated/OffloadPrint.hpp
+++ b/offload/liboffload/include/generated/OffloadPrint.hpp
@@ -51,24 +51,21 @@ inline std::ostream &operator<<(std::ostream &os, enum ol_errc_t value) {
case OL_ERRC_INVALID_PLATFORM:
os << "OL_ERRC_INVALID_PLATFORM";
break;
- case OL_ERRC_DEVICE_NOT_FOUND:
- os << "OL_ERRC_DEVICE_NOT_FOUND";
- break;
case OL_ERRC_INVALID_DEVICE:
os << "OL_ERRC_INVALID_DEVICE";
break;
- case OL_ERRC_DEVICE_LOST:
- os << "OL_ERRC_DEVICE_LOST";
+ case OL_ERRC_INVALID_QUEUE:
+ os << "OL_ERRC_INVALID_QUEUE";
+ break;
+ case OL_ERRC_INVALID_EVENT:
+ os << "OL_ERRC_INVALID_EVENT";
break;
- case OL_ERRC_UNINITIALIZED:
- os << "OL_ERRC_UNINITIALIZED";
+ case OL_ERRC_INVALID_KERNEL_NAME:
+ os << "OL_ERRC_INVALID_KERNEL_NAME";
break;
case OL_ERRC_OUT_OF_RESOURCES:
os << "OL_ERRC_OUT_OF_RESOURCES";
break;
- case OL_ERRC_UNSUPPORTED_VERSION:
- os << "OL_ERRC_UNSUPPORTED_VERSION";
- break;
case OL_ERRC_UNSUPPORTED_FEATURE:
os << "OL_ERRC_UNSUPPORTED_FEATURE";
break;
diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp
index 7dcb5d935d535..8be4c76030b36 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -53,7 +53,9 @@ struct ol_program_handle_t_ {
std::atomic_uint32_t RefCount;
};
-struct OffloadArguments {
+// A helper that can be used to construct the argument buffer for a kernel.
+// Alternatively, a pre-existing buffer can be set with `setArgsData`.
+struct OffloadKernelArguments {
static constexpr size_t MaxParamBytes = 4096u;
using args_t = std::array<char, MaxParamBytes>;
using args_size_t = std::vector<size_t>;
@@ -94,7 +96,7 @@ struct ol_kernel_handle_t_ {
ol_program_handle_t Program;
std::atomic_uint32_t RefCount;
GenericKernelTy *KernelImpl;
- OffloadArguments Args;
+ OffloadKernelArguments Args;
};
using PlatformVecT = SmallVector<ol_platform_handle_t_, 4>;
@@ -238,9 +240,8 @@ ol_impl_result_t olGetDeviceCount_impl(ol_platform_handle_t Platform,
ol_impl_result_t olGetDevice_impl(ol_platform_handle_t Platform,
uint32_t NumEntries,
ol_device_handle_t *Devices) {
- if (NumEntries > Platform->Devices.size()) {
+ if (NumEntries > Platform->Devices.size())
return OL_ERRC_INVALID_SIZE;
- }
for (uint32_t DeviceIndex = 0; DeviceIndex < NumEntries; DeviceIndex++) {
Devices[DeviceIndex] = &(Platform->Devices[DeviceIndex]);
@@ -326,10 +327,9 @@ ol_impl_result_t olMemAlloc_impl(ol_device_handle_t Device,
void **AllocationOut) {
auto Alloc =
Device->Device.dataAlloc(Size, nullptr, convertOlToPluginAllocTy(Type));
- if (!Alloc) {
+ if (!Alloc)
return {OL_ERRC_OUT_OF_RESOURCES,
formatv("Could not create allocation on device {0}", Device).str()};
- }
*AllocationOut = *Alloc;
return OL_SUCCESS;
@@ -338,9 +338,9 @@ ol_impl_result_t olMemAlloc_impl(ol_device_handle_t Device,
ol_impl_result_t olMemFree_impl(ol_device_handle_t Device, ol_alloc_type_t Type,
void *Address) {
auto Res = Device->Device.dataDelete(Address, convertOlToPluginAllocTy(Type));
- if (Res) {
+ if (Res)
return {OL_ERRC_OUT_OF_RESOURCES, "Could not free allocation"};
- }
+
return OL_SUCCESS;
}
@@ -348,10 +348,9 @@ ol_impl_result_t olCreateQueue_impl(ol_device_handle_t Device,
ol_queue_handle_t *Queue) {
auto CreatedQueue = std::make_unique<ol_queue_handle_t_>();
auto Err = Device->Device.initAsyncInfo(&(CreatedQueue->AsyncInfo));
- if (Err) {
- return OL_ERRC_OUT_OF_RESOURCES;
- }
- // TODO: Check error
+ if (Err)
+ return {OL_ERRC_UNKNOWN, "Could not initialize stream resource"};
+
CreatedQueue->Device = Device;
CreatedQueue->RefCount = 1;
*Queue = CreatedQueue.release();
@@ -364,9 +363,9 @@ ol_impl_result_t olRetainQueue_impl(ol_queue_handle_t Queue) {
}
ol_impl_result_t olReleaseQueue_impl(ol_queue_handle_t Queue) {
- if (--Queue->RefCount == 0) {
+ if (--Queue->RefCount == 0)
delete Queue;
- }
+
return OL_SUCCESS;
}
@@ -375,27 +374,25 @@ ol_impl_result_t olFinishQueue_impl(ol_queue_handle_t Queue) {
// on it, but we have nothing to synchronize in that situation anyway.
if (Queue->AsyncInfo->Queue) {
auto Err = Queue->Device->Device.synchronize(Queue->AsyncInfo);
- if (Err) {
- return OL_ERRC_OUT_OF_RESOURCES;
- }
+ if (Err)
+ return {OL_ERRC_INVALID_QUEUE, "The queue failed to synchronize"};
}
// Recreate the stream resource so the queue can be reused
// TODO: Would be easier for the synchronization to (optionally) not release
// it to begin with.
auto Res = Queue->Device->Device.initAsyncInfo(&Queue->AsyncInfo);
- if (Res) {
- return OL_ERRC_OUT_OF_RESOURCES;
- }
+ if (Res)
+ return {OL_ERRC_UNKNOWN, "Could not reinitialize the stream resource"};
return OL_SUCCESS;
}
ol_impl_result_t olWaitEvent_impl(ol_event_handle_t Event) {
auto Res = Event->Device->Device.syncEvent(Event->EventInfo);
- if (Res) {
- return OL_ERRC_OUT_OF_RESOURCES;
- }
+ if (Res)
+ return {OL_ERRC_INVALID_EVENT, "The event failed to synchronize"};
+
return OL_SUCCESS;
}
@@ -405,7 +402,9 @@ ol_impl_result_t olRetainEvent_impl(ol_event_handle_t Event) {
}
ol_impl_result_t olReleaseEvent_impl(ol_event_handle_t Event) {
- Event->RefCount--;
+ if (--Event->RefCount == 0)
+ delete Event;
+
return OL_SUCCESS;
}
@@ -413,14 +412,13 @@ ol_event_handle_t makeEvent(ol_queue_handle_t Queue) {
auto EventImpl = std::make_unique<ol_event_handle_t_>();
EventImpl->Queue = Queue;
auto Res = Queue->Device->Device.createEvent(&EventImpl->EventInfo);
- if (Res) {
+ if (Res)
return nullptr;
- }
+
Res =
Queue->Device->Device.recordEvent(EventImpl->EventInfo, Queue->AsyncInfo);
- if (Res) {
+ if (Res)
return nullptr;
- }
return EventImpl.release();
}
@@ -432,13 +430,11 @@ ol_impl_result_t olEnqueueDataWrite_impl(ol_queue_handle_t Queue, void *SrcPtr,
auto Res = DeviceImpl.dataSubmit(DstPtr, SrcPtr, Size, Queue->AsyncInfo);
- if (Res) {
- return OL_ERRC_OUT_OF_RESOURCES;
- }
+ if (Res)
+ return {OL_ERRC_UNKNOWN, "The data submit operation failed"};
- if (EventOut) {
+ if (EventOut)
*EventOut = makeEvent(Queue);
- }
return OL_SUCCESS;
}
@@ -450,13 +446,11 @@ ol_impl_result_t olEnqueueDataRead_impl(ol_queue_handle_t Queue, void *SrcPtr,
auto Res = DeviceImpl.dataRetrieve(DstPtr, SrcPtr, Size, Queue->AsyncInfo);
- if (Res) {
- return OL_ERRC_OUT_OF_RESOURCES;
- }
+ if (Res)
+ return {OL_ERRC_UNKNOWN, "The data retrieve operation failed"};
- if (EventOut) {
+ if (EventOut)
*EventOut = makeEvent(Queue);
- }
return OL_SUCCESS;
}
@@ -471,13 +465,11 @@ ol_impl_result_t olEnqueueDataCopy_impl(ol_queue_handle_t Queue, void *SrcPtr,
auto Res = DeviceImpl.dataExchange(SrcPtr, DstDevice->Device, DstPtr, Size,
Queue->AsyncInfo);
- if (Res) {
- return OL_ERRC_OUT_OF_RESOURCES;
- }
+ if (Res)
+ return {OL_ERRC_UNKNOWN, "The data exchange operation failed"};
- if (EventOut) {
+ if (EventOut)
*EventOut = makeEvent(Queue);
- }
return OL_SUCCESS;
}
@@ -485,12 +477,14 @@ ol_impl_result_t olEnqueueDataCopy_impl(ol_queue_handle_t Queue, void *SrcPtr,
ol_impl_result_t olCreateProgram_impl(ol_device_handle_t Device, void *ProgData,
size_t ProgDataSize,
ol_program_handle_t *Program) {
+ // Make a copy of the program binary in case it is released by the caller.
+ // TODO: Make this copy optional.
auto ImageData = MemoryBuffer::getMemBufferCopy(
StringRef(reinterpret_cast<char *>(ProgData), ProgDataSize));
- __tgt_device_image DeviceImage{(char *)ImageData->getBuffer().data(),
- ((char *)ImageData->getBuffer().data()) +
- ProgDataSize - 1,
- nullptr, nullptr};
+ __tgt_device_image DeviceImage{
+ const_cast<char *>(ImageData->getBuffer().data()),
+ const_cast<char *>(ImageData->getBuffer().data()) + ProgDataSize - 1,
+ nullptr, nullptr};
ol_program_handle_t Prog = new ol_program_handle_t_();
@@ -507,14 +501,14 @@ ol_impl_result_t olCreateProgram_impl(ol_device_handle_t Device, void *ProgData,
}
ol_impl_result_t olRetainProgram_impl(ol_program_handle_t Program) {
- ++Program->RefCount;
+ Program->RefCount++;
return OL_SUCCESS;
}
ol_impl_result_t olReleaseProgram_impl(ol_program_handle_t Program) {
- if (--Program->RefCount == 0) {
+ if (--Program->RefCount == 0)
delete Program;
- }
+
return OL_SUCCESS;
}
@@ -524,14 +518,12 @@ ol_impl_result_t olCreateKernel_impl(ol_program_handle_t Program,
auto &Device = Program->Image->getDevice();
auto KernelImpl = Device.constructKernel(KernelName);
- if (!KernelImpl) {
- return OL_ERRC_OUT_OF_RESOURCES;
- }
+ if (!KernelImpl)
+ return OL_ERRC_INVALID_KERNEL_NAME;
auto Err = KernelImpl->init(Device, *Program->Image);
- if (Err) {
- return OL_ERRC_OUT_OF_RESOURCES;
- }
+ if (Err)
+ return {OL_ERRC_UNKNOWN, "Could not initialize the kernel"};
ol_kernel_handle_t CreatedKernel = new ol_kernel_handle_t_();
CreatedKernel->Program = Program;
@@ -548,9 +540,9 @@ ol_impl_result_t olRetainKernel_impl(ol_kernel_handle_t Kernel) {
}
ol_impl_result_t olReleaseKernel_impl(ol_kernel_handle_t Kernel) {
- if (--Kernel->RefCount == 0) {
+ if (--Kernel->RefCount == 0)
delete Kernel;
- }
+
return OL_SUCCESS;
}
@@ -579,23 +571,22 @@ olEnqueueKernelLaunch_impl(ol_queue_handle_t Queue, ol_kernel_handle_t Kernel,
LaunchArgs.ThreadLimit[1] = LaunchSizeArgs->GroupSizeY;
LaunchArgs.ThreadLimit[2] = LaunchSizeArgs->GroupSizeZ;
- LaunchArgs.ArgPtrs = (void **)Kernel->Args.getStorage();
+ LaunchArgs.ArgPtrs =
+ reinterpret_cast<void **>(const_cast<char *>(Kernel->Args.getStorage()));
// No offsets needed, arguments are real pointers
auto ArgOffsets = std::vector<ptrdiff_t>(LaunchArgs.NumArgs, 0ul);
- auto Err = Kernel->KernelImpl->launch(
- DeviceImpl, (void **)Kernel->Args.getStorage(), ArgOffsets.data(),
- LaunchArgs, AsyncInfoWrapper);
+ auto Err = Kernel->KernelImpl->launch(DeviceImpl, LaunchArgs.ArgPtrs,
+ ArgOffsets.data(), LaunchArgs,
+ AsyncInfoWrapper);
AsyncInfoWrapper.finalize(Err);
- if (Err) {
- return OL_ERRC_OUT_OF_RESOURCES;
- }
+ if (Err)
+ return {OL_ERRC_UNKNOWN, "Could not finalize the AsyncInfoWrapper"};
- if (EventOut) {
+ if (EventOut)
*EventOut = makeEvent(Queue);
- }
return OL_SUCCESS;
}
>From 44122e140ea2b7f374e2315cb8e5fc972dec808b Mon Sep 17 00:00:00 2001
From: Callum Fare <callum at codeplay.com>
Date: Fri, 7 Feb 2025 13:56:08 +0000
Subject: [PATCH 12/16] Revert plugin changes
The offload unit tests will no longer work on host
Kernel execution will no longer work on CUDA
---
.../common/include/GlobalHandler.h | 5 ++--
offload/plugins-nextgen/cuda/src/rtl.cpp | 28 -------------------
offload/plugins-nextgen/host/src/rtl.cpp | 2 +-
3 files changed, 3 insertions(+), 32 deletions(-)
diff --git a/offload/plugins-nextgen/common/include/GlobalHandler.h b/offload/plugins-nextgen/common/include/GlobalHandler.h
index d65fceb8508d2..d2914e7cd0eb4 100644
--- a/offload/plugins-nextgen/common/include/GlobalHandler.h
+++ b/offload/plugins-nextgen/common/include/GlobalHandler.h
@@ -131,9 +131,8 @@ class GenericGlobalHandlerTy {
/// Get the address and size of a global in the image. Address and size are
/// return in \p ImageGlobal, the global name is passed in \p ImageGlobal.
- virtual Error getGlobalMetadataFromImage(GenericDeviceTy &Device,
- DeviceImageTy &Image,
- GlobalTy &ImageGlobal);
+ Error getGlobalMetadataFromImage(GenericDeviceTy &Device,
+ DeviceImageTy &Image, GlobalTy &ImageGlobal);
/// Read the memory associated with a global from the image and store it on
/// the host. The name, size, and destination are defined by \p HostGlobal.
diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp
index d7a69091ada74..894d1c2214b97 100644
--- a/offload/plugins-nextgen/cuda/src/rtl.cpp
+++ b/offload/plugins-nextgen/cuda/src/rtl.cpp
@@ -1327,34 +1327,6 @@ class CUDAGlobalHandlerTy final : public GenericGlobalHandlerTy {
DeviceGlobal.setPtr(reinterpret_cast<void *>(CUPtr));
return Plugin::success();
}
-
- Error getGlobalMetadataFromImage(GenericDeviceTy &Device,
- DeviceImageTy &Image,
- GlobalTy &ImageGlobal) override {
- // If the image is an ELF we can use the generic path, otherwise fall back
- // and use cuModuleGetGlobal to query the image.
- if (utils::elf::isELF(Image.getMemoryBuffer().getBuffer())) {
- return GenericGlobalHandlerTy::getGlobalMetadataFromImage(Device, Image,
- ImageGlobal);
- }
-
- CUDADeviceImageTy &CUDAImage = static_cast<CUDADeviceImageTy &>(Image);
-
- const char *GlobalName = ImageGlobal.getName().data();
-
- size_t CUSize;
- CUdeviceptr CUPtr;
- CUresult Res =
- cuModuleGetGlobal(&CUPtr, &CUSize, CUDAImage.getModule(), GlobalName);
- if (auto Err = Plugin::check(Res, "Error in cuModuleGetGlobal for '%s': %s",
- GlobalName))
- return Err;
-
- // Setup the global symbol's address and size.
- ImageGlobal.setPtr(reinterpret_cast<void *>(CUPtr));
- ImageGlobal.setSize(CUSize);
- return Plugin::success();
- }
};
/// Class implementing the CUDA-specific functionalities of the plugin.
diff --git a/offload/plugins-nextgen/host/src/rtl.cpp b/offload/plugins-nextgen/host/src/rtl.cpp
index 1ba9a49f4f9af..1d4db95fff500 100644
--- a/offload/plugins-nextgen/host/src/rtl.cpp
+++ b/offload/plugins-nextgen/host/src/rtl.cpp
@@ -289,7 +289,7 @@ struct GenELF64DeviceTy : public GenericDeviceTy {
/// This plugin does not support interoperability, do nothing
Error initAsyncInfoImpl(AsyncInfoWrapperTy &AsyncInfoWrapper) override {
- return Plugin::success();
+ return Plugin::error("initAsyncInfoImpl not supported");
}
/// This plugin does not support interoperability
>From 2aea02229fcc7833db9ed2fc1bb90b74aa5a1d25 Mon Sep 17 00:00:00 2001
From: Callum Fare <callum at codeplay.com>
Date: Fri, 7 Feb 2025 15:07:43 +0000
Subject: [PATCH 13/16] Rename `ol_*_handle_t_` -> `ol_*_impl_t`
---
.../liboffload/include/generated/OffloadAPI.h | 14 ++++----
offload/liboffload/src/OffloadImpl.cpp | 32 ++++++++++---------
offload/tools/offload-tblgen/APIGen.cpp | 9 +++++-
3 files changed, 32 insertions(+), 23 deletions(-)
diff --git a/offload/liboffload/include/generated/OffloadAPI.h b/offload/liboffload/include/generated/OffloadAPI.h
index f8683af811ef7..e463efd41cce1 100644
--- a/offload/liboffload/include/generated/OffloadAPI.h
+++ b/offload/liboffload/include/generated/OffloadAPI.h
@@ -75,31 +75,31 @@ extern "C" {
///////////////////////////////////////////////////////////////////////////////
/// @brief Handle of a platform instance
-typedef struct ol_platform_handle_t_ *ol_platform_handle_t;
+typedef struct ol_platform_impl_t *ol_platform_handle_t;
///////////////////////////////////////////////////////////////////////////////
/// @brief Handle of platform's device object
-typedef struct ol_device_handle_t_ *ol_device_handle_t;
+typedef struct ol_device_impl_t *ol_device_handle_t;
///////////////////////////////////////////////////////////////////////////////
/// @brief Handle of context object
-typedef struct ol_context_handle_t_ *ol_context_handle_t;
+typedef struct ol_context_impl_t *ol_context_handle_t;
///////////////////////////////////////////////////////////////////////////////
/// @brief Handle of queue object
-typedef struct ol_queue_handle_t_ *ol_queue_handle_t;
+typedef struct ol_queue_impl_t *ol_queue_handle_t;
///////////////////////////////////////////////////////////////////////////////
/// @brief Handle of event object
-typedef struct ol_event_handle_t_ *ol_event_handle_t;
+typedef struct ol_event_impl_t *ol_event_handle_t;
///////////////////////////////////////////////////////////////////////////////
/// @brief Handle of program object
-typedef struct ol_program_handle_t_ *ol_program_handle_t;
+typedef struct ol_program_impl_t *ol_program_handle_t;
///////////////////////////////////////////////////////////////////////////////
/// @brief Handle of kernel object
-typedef struct ol_kernel_handle_t_ *ol_kernel_handle_t;
+typedef struct ol_kernel_impl_t *ol_kernel_handle_t;
///////////////////////////////////////////////////////////////////////////////
/// @brief Defines Return/Error codes
diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp
index 8be4c76030b36..89cc42823261f 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -22,32 +22,34 @@
using namespace llvm;
using namespace llvm::omp::target::plugin;
-// Handle type definitions. Ideally these would be 1:1 with the plugins
-struct ol_device_handle_t_ {
+// Handle type definitions. Ideally these would be 1:1 with the plugins, but
+// we add some additional data here for now to avoid churn in the plugin
+// interface.
+struct ol_device_impl_t {
int DeviceNum;
GenericDeviceTy &Device;
ol_platform_handle_t Platform;
};
-struct ol_platform_handle_t_ {
+struct ol_platform_impl_t {
std::unique_ptr<GenericPluginTy> Plugin;
- std::vector<ol_device_handle_t_> Devices;
+ std::vector<ol_device_impl_t> Devices;
};
-struct ol_queue_handle_t_ {
+struct ol_queue_impl_t {
__tgt_async_info *AsyncInfo;
ol_device_handle_t Device;
std::atomic_uint32_t RefCount;
};
-struct ol_event_handle_t_ {
+struct ol_event_impl_t {
void *EventInfo;
ol_queue_handle_t Queue;
ol_device_handle_t Device;
std::atomic_uint32_t RefCount;
};
-struct ol_program_handle_t_ {
+struct ol_program_impl_t {
llvm::omp::target::plugin::DeviceImageTy *Image;
std::unique_ptr<MemoryBuffer> ImageData;
std::atomic_uint32_t RefCount;
@@ -92,14 +94,14 @@ struct OffloadKernelArguments {
const char *getStorage() const noexcept { return Storage.data(); }
};
-struct ol_kernel_handle_t_ {
+struct ol_kernel_impl_t {
ol_program_handle_t Program;
std::atomic_uint32_t RefCount;
GenericKernelTy *KernelImpl;
OffloadKernelArguments Args;
};
-using PlatformVecT = SmallVector<ol_platform_handle_t_, 4>;
+using PlatformVecT = SmallVector<ol_platform_impl_t, 4>;
PlatformVecT &Platforms() {
static PlatformVecT Platforms;
return Platforms;
@@ -128,7 +130,7 @@ void initPlugins() {
// Attempt to create an instance of each supported plugin.
#define PLUGIN_TARGET(Name) \
do { \
- Platforms().emplace_back(ol_platform_handle_t_{ \
+ Platforms().emplace_back(ol_platform_impl_t{ \
std::unique_ptr<GenericPluginTy>(createPlugin_##Name()), {}}); \
} while (false);
#include "Shared/Targets.def"
@@ -141,7 +143,7 @@ void initPlugins() {
for (auto DevNum = 0; DevNum < Platform.Plugin->number_of_devices();
DevNum++) {
if (Platform.Plugin->init_device(DevNum) == OFFLOAD_SUCCESS) {
- Platform.Devices.emplace_back(ol_device_handle_t_{
+ Platform.Devices.emplace_back(ol_device_impl_t{
DevNum, Platform.Plugin->getDevice(DevNum), &Platform});
}
}
@@ -346,7 +348,7 @@ ol_impl_result_t olMemFree_impl(ol_device_handle_t Device, ol_alloc_type_t Type,
ol_impl_result_t olCreateQueue_impl(ol_device_handle_t Device,
ol_queue_handle_t *Queue) {
- auto CreatedQueue = std::make_unique<ol_queue_handle_t_>();
+ auto CreatedQueue = std::make_unique<ol_queue_impl_t>();
auto Err = Device->Device.initAsyncInfo(&(CreatedQueue->AsyncInfo));
if (Err)
return {OL_ERRC_UNKNOWN, "Could not initialize stream resource"};
@@ -409,7 +411,7 @@ ol_impl_result_t olReleaseEvent_impl(ol_event_handle_t Event) {
}
ol_event_handle_t makeEvent(ol_queue_handle_t Queue) {
- auto EventImpl = std::make_unique<ol_event_handle_t_>();
+ auto EventImpl = std::make_unique<ol_event_impl_t>();
EventImpl->Queue = Queue;
auto Res = Queue->Device->Device.createEvent(&EventImpl->EventInfo);
if (Res)
@@ -486,7 +488,7 @@ ol_impl_result_t olCreateProgram_impl(ol_device_handle_t Device, void *ProgData,
const_cast<char *>(ImageData->getBuffer().data()) + ProgDataSize - 1,
nullptr, nullptr};
- ol_program_handle_t Prog = new ol_program_handle_t_();
+ ol_program_handle_t Prog = new ol_program_impl_t();
auto Res = Device->Device.loadBinary(Device->Device.Plugin, &DeviceImage);
if (!Res)
@@ -525,7 +527,7 @@ ol_impl_result_t olCreateKernel_impl(ol_program_handle_t Program,
if (Err)
return {OL_ERRC_UNKNOWN, "Could not initialize the kernel"};
- ol_kernel_handle_t CreatedKernel = new ol_kernel_handle_t_();
+ ol_kernel_handle_t CreatedKernel = new ol_kernel_impl_t();
CreatedKernel->Program = Program;
CreatedKernel->RefCount = 1;
CreatedKernel->KernelImpl = &*KernelImpl;
diff --git a/offload/tools/offload-tblgen/APIGen.cpp b/offload/tools/offload-tblgen/APIGen.cpp
index 97a2464f7a75c..8cc5bd5e452fe 100644
--- a/offload/tools/offload-tblgen/APIGen.cpp
+++ b/offload/tools/offload-tblgen/APIGen.cpp
@@ -41,9 +41,16 @@ static std::string MakeComment(StringRef in) {
}
static void ProcessHandle(const HandleRec &H, raw_ostream &OS) {
+ if (!H.getName().ends_with("_handle_t")) {
+ errs() << "Handle type name (" << H.getName()
+ << ") must end with '_handle_t'!\n";
+ exit(1);
+ }
+
+ auto ImplName = H.getName().substr(0, H.getName().size() - 9) + "_impl_t";
OS << CommentsHeader;
OS << formatv("/// @brief {0}\n", H.getDesc());
- OS << formatv("typedef struct {0}_ *{0};\n", H.getName());
+ OS << formatv("typedef struct {0} *{1};\n", ImplName, H.getName());
}
static void ProcessTypedef(const TypedefRec &T, raw_ostream &OS) {
>From 5c121fa88ec33671c71babcd580216f4609b667e Mon Sep 17 00:00:00 2001
From: Callum Fare <callum at codeplay.com>
Date: Tue, 11 Feb 2025 16:58:55 +0000
Subject: [PATCH 14/16] Various fixes to address review feedback
---
offload/liboffload/API/Enqueue.td | 8 +-
offload/liboffload/API/Program.td | 2 +-
offload/liboffload/API/README.md | 6 +-
.../liboffload/include/generated/OffloadAPI.h | 46 +--
.../include/generated/OffloadEntryPoints.inc | 335 +++++++++---------
.../generated/OffloadImplFuncDecls.inc | 15 +-
.../include/generated/OffloadPrint.hpp | 24 +-
offload/liboffload/src/OffloadImpl.cpp | 13 +-
.../tools/offload-tblgen/EntryPointGen.cpp | 10 +-
.../OffloadAPI/enqueue/olEnqueueDataCopy.cpp | 6 +-
.../OffloadAPI/enqueue/olEnqueueDataRead.cpp | 4 +-
.../OffloadAPI/enqueue/olEnqueueDataWrite.cpp | 2 +-
12 files changed, 234 insertions(+), 237 deletions(-)
diff --git a/offload/liboffload/API/Enqueue.td b/offload/liboffload/API/Enqueue.td
index f503bf3c44139..695b157ac1de3 100644
--- a/offload/liboffload/API/Enqueue.td
+++ b/offload/liboffload/API/Enqueue.td
@@ -16,8 +16,8 @@ def : Function {
let details = [];
let params = [
Param<"ol_queue_handle_t", "Queue", "handle of the queue", PARAM_IN>,
- Param<"void*", "SrcPtr", "host pointer to copy from", PARAM_IN>,
Param<"void*", "DstPtr", "device pointer to copy to", PARAM_IN>,
+ Param<"void*", "SrcPtr", "host pointer to copy from", PARAM_IN>,
Param<"size_t", "Size", "size in bytes of data to copy", PARAM_IN>,
Param<"ol_event_handle_t*", "EventOut", "optional recorded event for the enqueued operation", PARAM_OUT_OPTIONAL>
];
@@ -32,8 +32,8 @@ def : Function {
let details = [];
let params = [
Param<"ol_queue_handle_t", "Queue", "handle of the queue", PARAM_IN>,
- Param<"void*", "SrcPtr", "device pointer to copy from", PARAM_IN>,
Param<"void*", "DstPtr", "host pointer to copy to", PARAM_IN>,
+ Param<"void*", "SrcPtr", "device pointer to copy from", PARAM_IN>,
Param<"size_t", "Size", "size in bytes of data to copy", PARAM_IN>,
Param<"ol_event_handle_t*", "EventOut", "optional recorded event for the enqueued operation", PARAM_OUT_OPTIONAL>
];
@@ -46,9 +46,9 @@ def : Function {
let details = [];
let params = [
Param<"ol_queue_handle_t", "Queue", "handle of the queue", PARAM_IN>,
- Param<"void*", "SrcPtr", "device pointer to copy from", PARAM_IN>,
- Param<"void*", "DstPtr", "device pointer to copy to", PARAM_IN>,
Param<"ol_device_handle_t", "DstDevice", "device that the destination pointer is resident on", PARAM_IN>,
+ Param<"void*", "DstPtr", "device pointer to copy to", PARAM_IN>,
+ Param<"void*", "SrcPtr", "device pointer to copy from", PARAM_IN>,
Param<"size_t", "Size", "size in bytes of data to copy", PARAM_IN>,
Param<"ol_event_handle_t*", "EventOut", "optional recorded event for the enqueued operation", PARAM_OUT_OPTIONAL>
];
diff --git a/offload/liboffload/API/Program.td b/offload/liboffload/API/Program.td
index d928f3d0cc2f1..b2ea21f0877e6 100644
--- a/offload/liboffload/API/Program.td
+++ b/offload/liboffload/API/Program.td
@@ -20,7 +20,7 @@ def : Function {
Param<"ol_device_handle_t", "Device", "handle of the device", PARAM_IN>,
Param<"void*", "ProgData", "pointer to the program binary data", PARAM_IN>,
Param<"size_t", "ProgDataSize", "size of the program binary in bytes", PARAM_IN>,
- Param<"ol_program_handle_t*", "Queue", "output pointer for the created program", PARAM_OUT>
+ Param<"ol_program_handle_t*", "Program", "output pointer for the created program", PARAM_OUT>
];
let returns = [];
}
diff --git a/offload/liboffload/API/README.md b/offload/liboffload/API/README.md
index 38a055811b2d0..a205dc007b2ae 100644
--- a/offload/liboffload/API/README.md
+++ b/offload/liboffload/API/README.md
@@ -138,8 +138,8 @@ allow more backends to be easily added in future.
A new object can be added to the API by adding to one of the existing `.td`
files. It is also possible to add a new tablegen file to the API by adding it
-to the includes in `OffloadAPI.td`. When the offload target is rebuilt, the
-new definition will be included in the generated files.
+to the includes in `OffloadAPI.td`. When the `OffloadGenerate` target is
+rebuilt, the new definition will be included in the generated files.
### Adding a new entry point
@@ -147,4 +147,4 @@ When a new entry point is added (e.g. `offloadDeviceFoo`), the actual entry
point is automatically generated, which contains validation and tracing code.
It expects an implementation function (`offloadDeviceFoo_impl`) to be defined,
which it will call into. The definition of this implementation function should
-be added to `src/offload_impl.cpp`
+be added to `src/OffloadImpl.cpp`
diff --git a/offload/liboffload/include/generated/OffloadAPI.h b/offload/liboffload/include/generated/OffloadAPI.h
index e463efd41cce1..110d252fe45a7 100644
--- a/offload/liboffload/include/generated/OffloadAPI.h
+++ b/offload/liboffload/include/generated/OffloadAPI.h
@@ -667,15 +667,15 @@ OL_APIEXPORT ol_result_t OL_APICALL olWaitEvent(
/// - ::OL_ERRC_INVALID_NULL_HANDLE
/// + `NULL == Queue`
/// - ::OL_ERRC_INVALID_NULL_POINTER
-/// + `NULL == SrcPtr`
/// + `NULL == DstPtr`
+/// + `NULL == SrcPtr`
OL_APIEXPORT ol_result_t OL_APICALL olEnqueueDataWrite(
// [in] handle of the queue
ol_queue_handle_t Queue,
- // [in] host pointer to copy from
- void *SrcPtr,
// [in] device pointer to copy to
void *DstPtr,
+ // [in] host pointer to copy from
+ void *SrcPtr,
// [in] size in bytes of data to copy
size_t Size,
// [out][optional] optional recorded event for the enqueued operation
@@ -693,15 +693,15 @@ OL_APIEXPORT ol_result_t OL_APICALL olEnqueueDataWrite(
/// - ::OL_ERRC_INVALID_NULL_HANDLE
/// + `NULL == Queue`
/// - ::OL_ERRC_INVALID_NULL_POINTER
-/// + `NULL == SrcPtr`
/// + `NULL == DstPtr`
+/// + `NULL == SrcPtr`
OL_APIEXPORT ol_result_t OL_APICALL olEnqueueDataRead(
// [in] handle of the queue
ol_queue_handle_t Queue,
- // [in] device pointer to copy from
- void *SrcPtr,
// [in] host pointer to copy to
void *DstPtr,
+ // [in] device pointer to copy from
+ void *SrcPtr,
// [in] size in bytes of data to copy
size_t Size,
// [out][optional] optional recorded event for the enqueued operation
@@ -720,17 +720,17 @@ OL_APIEXPORT ol_result_t OL_APICALL olEnqueueDataRead(
/// + `NULL == Queue`
/// + `NULL == DstDevice`
/// - ::OL_ERRC_INVALID_NULL_POINTER
-/// + `NULL == SrcPtr`
/// + `NULL == DstPtr`
+/// + `NULL == SrcPtr`
OL_APIEXPORT ol_result_t OL_APICALL olEnqueueDataCopy(
// [in] handle of the queue
ol_queue_handle_t Queue,
- // [in] device pointer to copy from
- void *SrcPtr,
- // [in] device pointer to copy to
- void *DstPtr,
// [in] device that the destination pointer is resident on
ol_device_handle_t DstDevice,
+ // [in] device pointer to copy to
+ void *DstPtr,
+ // [in] device pointer to copy from
+ void *SrcPtr,
// [in] size in bytes of data to copy
size_t Size,
// [out][optional] optional recorded event for the enqueued operation
@@ -787,7 +787,7 @@ OL_APIEXPORT ol_result_t OL_APICALL olEnqueueKernelLaunch(
/// + `NULL == Device`
/// - ::OL_ERRC_INVALID_NULL_POINTER
/// + `NULL == ProgData`
-/// + `NULL == Queue`
+/// + `NULL == Program`
OL_APIEXPORT ol_result_t OL_APICALL olCreateProgram(
// [in] handle of the device
ol_device_handle_t Device,
@@ -796,7 +796,7 @@ OL_APIEXPORT ol_result_t OL_APICALL olCreateProgram(
// [in] size of the program binary in bytes
size_t ProgDataSize,
// [out] output pointer for the created program
- ol_program_handle_t *Queue);
+ ol_program_handle_t *Program);
///////////////////////////////////////////////////////////////////////////////
/// @brief Increment the program's reference count
@@ -1082,8 +1082,8 @@ typedef struct ol_wait_event_params_t {
/// @details Each entry is a pointer to the parameter passed to the function;
typedef struct ol_enqueue_data_write_params_t {
ol_queue_handle_t *pQueue;
- void **pSrcPtr;
void **pDstPtr;
+ void **pSrcPtr;
size_t *pSize;
ol_event_handle_t **pEventOut;
} ol_enqueue_data_write_params_t;
@@ -1093,8 +1093,8 @@ typedef struct ol_enqueue_data_write_params_t {
/// @details Each entry is a pointer to the parameter passed to the function;
typedef struct ol_enqueue_data_read_params_t {
ol_queue_handle_t *pQueue;
- void **pSrcPtr;
void **pDstPtr;
+ void **pSrcPtr;
size_t *pSize;
ol_event_handle_t **pEventOut;
} ol_enqueue_data_read_params_t;
@@ -1104,9 +1104,9 @@ typedef struct ol_enqueue_data_read_params_t {
/// @details Each entry is a pointer to the parameter passed to the function;
typedef struct ol_enqueue_data_copy_params_t {
ol_queue_handle_t *pQueue;
- void **pSrcPtr;
- void **pDstPtr;
ol_device_handle_t *pDstDevice;
+ void **pDstPtr;
+ void **pSrcPtr;
size_t *pSize;
ol_event_handle_t **pEventOut;
} ol_enqueue_data_copy_params_t;
@@ -1128,7 +1128,7 @@ typedef struct ol_create_program_params_t {
ol_device_handle_t *pDevice;
void **pProgData;
size_t *pProgDataSize;
- ol_program_handle_t **pQueue;
+ ol_program_handle_t **pProgram;
} ol_create_program_params_t;
///////////////////////////////////////////////////////////////////////////////
@@ -1331,7 +1331,7 @@ OL_APIEXPORT ol_result_t OL_APICALL olWaitEventWithCodeLoc(
/// information
/// @details See also ::olEnqueueDataWrite
OL_APIEXPORT ol_result_t OL_APICALL olEnqueueDataWriteWithCodeLoc(
- ol_queue_handle_t Queue, void *SrcPtr, void *DstPtr, size_t Size,
+ ol_queue_handle_t Queue, void *DstPtr, void *SrcPtr, size_t Size,
ol_event_handle_t *EventOut, ol_code_location_t *CodeLocation);
///////////////////////////////////////////////////////////////////////////////
@@ -1339,7 +1339,7 @@ OL_APIEXPORT ol_result_t OL_APICALL olEnqueueDataWriteWithCodeLoc(
/// information
/// @details See also ::olEnqueueDataRead
OL_APIEXPORT ol_result_t OL_APICALL olEnqueueDataReadWithCodeLoc(
- ol_queue_handle_t Queue, void *SrcPtr, void *DstPtr, size_t Size,
+ ol_queue_handle_t Queue, void *DstPtr, void *SrcPtr, size_t Size,
ol_event_handle_t *EventOut, ol_code_location_t *CodeLocation);
///////////////////////////////////////////////////////////////////////////////
@@ -1347,8 +1347,8 @@ OL_APIEXPORT ol_result_t OL_APICALL olEnqueueDataReadWithCodeLoc(
/// information
/// @details See also ::olEnqueueDataCopy
OL_APIEXPORT ol_result_t OL_APICALL olEnqueueDataCopyWithCodeLoc(
- ol_queue_handle_t Queue, void *SrcPtr, void *DstPtr,
- ol_device_handle_t DstDevice, size_t Size, ol_event_handle_t *EventOut,
+ ol_queue_handle_t Queue, ol_device_handle_t DstDevice, void *DstPtr,
+ void *SrcPtr, size_t Size, ol_event_handle_t *EventOut,
ol_code_location_t *CodeLocation);
///////////////////////////////////////////////////////////////////////////////
@@ -1366,7 +1366,7 @@ OL_APIEXPORT ol_result_t OL_APICALL olEnqueueKernelLaunchWithCodeLoc(
/// @details See also ::olCreateProgram
OL_APIEXPORT ol_result_t OL_APICALL olCreateProgramWithCodeLoc(
ol_device_handle_t Device, void *ProgData, size_t ProgDataSize,
- ol_program_handle_t *Queue, ol_code_location_t *CodeLocation);
+ ol_program_handle_t *Program, ol_code_location_t *CodeLocation);
///////////////////////////////////////////////////////////////////////////////
/// @brief Variant of olRetainProgram that also sets source code location
diff --git a/offload/liboffload/include/generated/OffloadEntryPoints.inc b/offload/liboffload/include/generated/OffloadEntryPoints.inc
index ffef36b5a9ac4..bd9641f74d1bb 100644
--- a/offload/liboffload/include/generated/OffloadEntryPoints.inc
+++ b/offload/liboffload/include/generated/OffloadEntryPoints.inc
@@ -15,16 +15,16 @@ ol_impl_result_t olInit_val() {
}
OL_APIEXPORT ol_result_t OL_APICALL olInit() {
if (offloadConfig().TracingEnabled) {
- std::cout << "---> olInit";
+ std::cerr << "---> olInit";
}
ol_result_t Result = olInit_val();
if (offloadConfig().TracingEnabled) {
- std::cout << "()";
- std::cout << "-> " << Result << "\n";
+ std::cerr << "()";
+ std::cerr << "-> " << Result << "\n";
if (Result && Result->Details) {
- std::cout << " *Error Details* " << Result->Details << " \n";
+ std::cerr << " *Error Details* " << Result->Details << " \n";
}
}
return Result;
@@ -46,16 +46,16 @@ ol_impl_result_t olShutDown_val() {
}
OL_APIEXPORT ol_result_t OL_APICALL olShutDown() {
if (offloadConfig().TracingEnabled) {
- std::cout << "---> olShutDown";
+ std::cerr << "---> olShutDown";
}
ol_result_t Result = olShutDown_val();
if (offloadConfig().TracingEnabled) {
- std::cout << "()";
- std::cout << "-> " << Result << "\n";
+ std::cerr << "()";
+ std::cerr << "-> " << Result << "\n";
if (Result && Result->Details) {
- std::cout << " *Error Details* " << Result->Details << " \n";
+ std::cerr << " *Error Details* " << Result->Details << " \n";
}
}
return Result;
@@ -86,17 +86,17 @@ ol_impl_result_t olGetPlatform_val(uint32_t NumEntries,
OL_APIEXPORT ol_result_t OL_APICALL
olGetPlatform(uint32_t NumEntries, ol_platform_handle_t *Platforms) {
if (offloadConfig().TracingEnabled) {
- std::cout << "---> olGetPlatform";
+ std::cerr << "---> olGetPlatform";
}
ol_result_t Result = olGetPlatform_val(NumEntries, Platforms);
if (offloadConfig().TracingEnabled) {
ol_get_platform_params_t Params = {&NumEntries, &Platforms};
- std::cout << "(" << &Params << ")";
- std::cout << "-> " << Result << "\n";
+ std::cerr << "(" << &Params << ")";
+ std::cerr << "-> " << Result << "\n";
if (Result && Result->Details) {
- std::cout << " *Error Details* " << Result->Details << " \n";
+ std::cerr << " *Error Details* " << Result->Details << " \n";
}
}
return Result;
@@ -123,17 +123,17 @@ ol_impl_result_t olGetPlatformCount_val(uint32_t *NumPlatforms) {
}
OL_APIEXPORT ol_result_t OL_APICALL olGetPlatformCount(uint32_t *NumPlatforms) {
if (offloadConfig().TracingEnabled) {
- std::cout << "---> olGetPlatformCount";
+ std::cerr << "---> olGetPlatformCount";
}
ol_result_t Result = olGetPlatformCount_val(NumPlatforms);
if (offloadConfig().TracingEnabled) {
ol_get_platform_count_params_t Params = {&NumPlatforms};
- std::cout << "(" << &Params << ")";
- std::cout << "-> " << Result << "\n";
+ std::cerr << "(" << &Params << ")";
+ std::cerr << "-> " << Result << "\n";
if (Result && Result->Details) {
- std::cout << " *Error Details* " << Result->Details << " \n";
+ std::cerr << " *Error Details* " << Result->Details << " \n";
}
}
return Result;
@@ -171,7 +171,7 @@ OL_APIEXPORT ol_result_t OL_APICALL
olGetPlatformInfo(ol_platform_handle_t Platform, ol_platform_info_t PropName,
size_t PropSize, void *PropValue) {
if (offloadConfig().TracingEnabled) {
- std::cout << "---> olGetPlatformInfo";
+ std::cerr << "---> olGetPlatformInfo";
}
ol_result_t Result =
@@ -180,10 +180,10 @@ olGetPlatformInfo(ol_platform_handle_t Platform, ol_platform_info_t PropName,
if (offloadConfig().TracingEnabled) {
ol_get_platform_info_params_t Params = {&Platform, &PropName, &PropSize,
&PropValue};
- std::cout << "(" << &Params << ")";
- std::cout << "-> " << Result << "\n";
+ std::cerr << "(" << &Params << ")";
+ std::cerr << "-> " << Result << "\n";
if (Result && Result->Details) {
- std::cout << " *Error Details* " << Result->Details << " \n";
+ std::cerr << " *Error Details* " << Result->Details << " \n";
}
}
return Result;
@@ -220,7 +220,7 @@ OL_APIEXPORT ol_result_t OL_APICALL
olGetPlatformInfoSize(ol_platform_handle_t Platform,
ol_platform_info_t PropName, size_t *PropSizeRet) {
if (offloadConfig().TracingEnabled) {
- std::cout << "---> olGetPlatformInfoSize";
+ std::cerr << "---> olGetPlatformInfoSize";
}
ol_result_t Result =
@@ -229,10 +229,10 @@ olGetPlatformInfoSize(ol_platform_handle_t Platform,
if (offloadConfig().TracingEnabled) {
ol_get_platform_info_size_params_t Params = {&Platform, &PropName,
&PropSizeRet};
- std::cout << "(" << &Params << ")";
- std::cout << "-> " << Result << "\n";
+ std::cerr << "(" << &Params << ")";
+ std::cerr << "-> " << Result << "\n";
if (Result && Result->Details) {
- std::cout << " *Error Details* " << Result->Details << " \n";
+ std::cerr << " *Error Details* " << Result->Details << " \n";
}
}
return Result;
@@ -266,17 +266,17 @@ ol_impl_result_t olGetDeviceCount_val(ol_platform_handle_t Platform,
OL_APIEXPORT ol_result_t OL_APICALL
olGetDeviceCount(ol_platform_handle_t Platform, uint32_t *NumDevices) {
if (offloadConfig().TracingEnabled) {
- std::cout << "---> olGetDeviceCount";
+ std::cerr << "---> olGetDeviceCount";
}
ol_result_t Result = olGetDeviceCount_val(Platform, NumDevices);
if (offloadConfig().TracingEnabled) {
ol_get_device_count_params_t Params = {&Platform, &NumDevices};
- std::cout << "(" << &Params << ")";
- std::cout << "-> " << Result << "\n";
+ std::cerr << "(" << &Params << ")";
+ std::cerr << "-> " << Result << "\n";
if (Result && Result->Details) {
- std::cout << " *Error Details* " << Result->Details << " \n";
+ std::cerr << " *Error Details* " << Result->Details << " \n";
}
}
return Result;
@@ -315,17 +315,17 @@ OL_APIEXPORT ol_result_t OL_APICALL olGetDevice(ol_platform_handle_t Platform,
uint32_t NumEntries,
ol_device_handle_t *Devices) {
if (offloadConfig().TracingEnabled) {
- std::cout << "---> olGetDevice";
+ std::cerr << "---> olGetDevice";
}
ol_result_t Result = olGetDevice_val(Platform, NumEntries, Devices);
if (offloadConfig().TracingEnabled) {
ol_get_device_params_t Params = {&Platform, &NumEntries, &Devices};
- std::cout << "(" << &Params << ")";
- std::cout << "-> " << Result << "\n";
+ std::cerr << "(" << &Params << ")";
+ std::cerr << "-> " << Result << "\n";
if (Result && Result->Details) {
- std::cout << " *Error Details* " << Result->Details << " \n";
+ std::cerr << " *Error Details* " << Result->Details << " \n";
}
}
return Result;
@@ -366,7 +366,7 @@ OL_APIEXPORT ol_result_t OL_APICALL olGetDeviceInfo(ol_device_handle_t Device,
size_t PropSize,
void *PropValue) {
if (offloadConfig().TracingEnabled) {
- std::cout << "---> olGetDeviceInfo";
+ std::cerr << "---> olGetDeviceInfo";
}
ol_result_t Result =
@@ -375,10 +375,10 @@ OL_APIEXPORT ol_result_t OL_APICALL olGetDeviceInfo(ol_device_handle_t Device,
if (offloadConfig().TracingEnabled) {
ol_get_device_info_params_t Params = {&Device, &PropName, &PropSize,
&PropValue};
- std::cout << "(" << &Params << ")";
- std::cout << "-> " << Result << "\n";
+ std::cerr << "(" << &Params << ")";
+ std::cerr << "-> " << Result << "\n";
if (Result && Result->Details) {
- std::cout << " *Error Details* " << Result->Details << " \n";
+ std::cerr << " *Error Details* " << Result->Details << " \n";
}
}
return Result;
@@ -413,7 +413,7 @@ ol_impl_result_t olGetDeviceInfoSize_val(ol_device_handle_t Device,
OL_APIEXPORT ol_result_t OL_APICALL olGetDeviceInfoSize(
ol_device_handle_t Device, ol_device_info_t PropName, size_t *PropSizeRet) {
if (offloadConfig().TracingEnabled) {
- std::cout << "---> olGetDeviceInfoSize";
+ std::cerr << "---> olGetDeviceInfoSize";
}
ol_result_t Result = olGetDeviceInfoSize_val(Device, PropName, PropSizeRet);
@@ -421,10 +421,10 @@ OL_APIEXPORT ol_result_t OL_APICALL olGetDeviceInfoSize(
if (offloadConfig().TracingEnabled) {
ol_get_device_info_size_params_t Params = {&Device, &PropName,
&PropSizeRet};
- std::cout << "(" << &Params << ")";
- std::cout << "-> " << Result << "\n";
+ std::cerr << "(" << &Params << ")";
+ std::cerr << "-> " << Result << "\n";
if (Result && Result->Details) {
- std::cout << " *Error Details* " << Result->Details << " \n";
+ std::cerr << " *Error Details* " << Result->Details << " \n";
}
}
return Result;
@@ -464,17 +464,17 @@ OL_APIEXPORT ol_result_t OL_APICALL olMemAlloc(ol_device_handle_t Device,
size_t Size,
void **AllocationOut) {
if (offloadConfig().TracingEnabled) {
- std::cout << "---> olMemAlloc";
+ std::cerr << "---> olMemAlloc";
}
ol_result_t Result = olMemAlloc_val(Device, Type, Size, AllocationOut);
if (offloadConfig().TracingEnabled) {
ol_mem_alloc_params_t Params = {&Device, &Type, &Size, &AllocationOut};
- std::cout << "(" << &Params << ")";
- std::cout << "-> " << Result << "\n";
+ std::cerr << "(" << &Params << ")";
+ std::cerr << "-> " << Result << "\n";
if (Result && Result->Details) {
- std::cout << " *Error Details* " << Result->Details << " \n";
+ std::cerr << " *Error Details* " << Result->Details << " \n";
}
}
return Result;
@@ -509,17 +509,17 @@ OL_APIEXPORT ol_result_t OL_APICALL olMemFree(ol_device_handle_t Device,
ol_alloc_type_t Type,
void *Address) {
if (offloadConfig().TracingEnabled) {
- std::cout << "---> olMemFree";
+ std::cerr << "---> olMemFree";
}
ol_result_t Result = olMemFree_val(Device, Type, Address);
if (offloadConfig().TracingEnabled) {
ol_mem_free_params_t Params = {&Device, &Type, &Address};
- std::cout << "(" << &Params << ")";
- std::cout << "-> " << Result << "\n";
+ std::cerr << "(" << &Params << ")";
+ std::cerr << "-> " << Result << "\n";
if (Result && Result->Details) {
- std::cout << " *Error Details* " << Result->Details << " \n";
+ std::cerr << " *Error Details* " << Result->Details << " \n";
}
}
return Result;
@@ -552,17 +552,17 @@ ol_impl_result_t olCreateQueue_val(ol_device_handle_t Device,
OL_APIEXPORT ol_result_t OL_APICALL olCreateQueue(ol_device_handle_t Device,
ol_queue_handle_t *Queue) {
if (offloadConfig().TracingEnabled) {
- std::cout << "---> olCreateQueue";
+ std::cerr << "---> olCreateQueue";
}
ol_result_t Result = olCreateQueue_val(Device, Queue);
if (offloadConfig().TracingEnabled) {
ol_create_queue_params_t Params = {&Device, &Queue};
- std::cout << "(" << &Params << ")";
- std::cout << "-> " << Result << "\n";
+ std::cerr << "(" << &Params << ")";
+ std::cerr << "-> " << Result << "\n";
if (Result && Result->Details) {
- std::cout << " *Error Details* " << Result->Details << " \n";
+ std::cerr << " *Error Details* " << Result->Details << " \n";
}
}
return Result;
@@ -589,17 +589,17 @@ ol_impl_result_t olRetainQueue_val(ol_queue_handle_t Queue) {
}
OL_APIEXPORT ol_result_t OL_APICALL olRetainQueue(ol_queue_handle_t Queue) {
if (offloadConfig().TracingEnabled) {
- std::cout << "---> olRetainQueue";
+ std::cerr << "---> olRetainQueue";
}
ol_result_t Result = olRetainQueue_val(Queue);
if (offloadConfig().TracingEnabled) {
ol_retain_queue_params_t Params = {&Queue};
- std::cout << "(" << &Params << ")";
- std::cout << "-> " << Result << "\n";
+ std::cerr << "(" << &Params << ")";
+ std::cerr << "-> " << Result << "\n";
if (Result && Result->Details) {
- std::cout << " *Error Details* " << Result->Details << " \n";
+ std::cerr << " *Error Details* " << Result->Details << " \n";
}
}
return Result;
@@ -625,17 +625,17 @@ ol_impl_result_t olReleaseQueue_val(ol_queue_handle_t Queue) {
}
OL_APIEXPORT ol_result_t OL_APICALL olReleaseQueue(ol_queue_handle_t Queue) {
if (offloadConfig().TracingEnabled) {
- std::cout << "---> olReleaseQueue";
+ std::cerr << "---> olReleaseQueue";
}
ol_result_t Result = olReleaseQueue_val(Queue);
if (offloadConfig().TracingEnabled) {
ol_release_queue_params_t Params = {&Queue};
- std::cout << "(" << &Params << ")";
- std::cout << "-> " << Result << "\n";
+ std::cerr << "(" << &Params << ")";
+ std::cerr << "-> " << Result << "\n";
if (Result && Result->Details) {
- std::cout << " *Error Details* " << Result->Details << " \n";
+ std::cerr << " *Error Details* " << Result->Details << " \n";
}
}
return Result;
@@ -661,17 +661,17 @@ ol_impl_result_t olFinishQueue_val(ol_queue_handle_t Queue) {
}
OL_APIEXPORT ol_result_t OL_APICALL olFinishQueue(ol_queue_handle_t Queue) {
if (offloadConfig().TracingEnabled) {
- std::cout << "---> olFinishQueue";
+ std::cerr << "---> olFinishQueue";
}
ol_result_t Result = olFinishQueue_val(Queue);
if (offloadConfig().TracingEnabled) {
ol_finish_queue_params_t Params = {&Queue};
- std::cout << "(" << &Params << ")";
- std::cout << "-> " << Result << "\n";
+ std::cerr << "(" << &Params << ")";
+ std::cerr << "-> " << Result << "\n";
if (Result && Result->Details) {
- std::cout << " *Error Details* " << Result->Details << " \n";
+ std::cerr << " *Error Details* " << Result->Details << " \n";
}
}
return Result;
@@ -697,17 +697,17 @@ ol_impl_result_t olRetainEvent_val(ol_event_handle_t Event) {
}
OL_APIEXPORT ol_result_t OL_APICALL olRetainEvent(ol_event_handle_t Event) {
if (offloadConfig().TracingEnabled) {
- std::cout << "---> olRetainEvent";
+ std::cerr << "---> olRetainEvent";
}
ol_result_t Result = olRetainEvent_val(Event);
if (offloadConfig().TracingEnabled) {
ol_retain_event_params_t Params = {&Event};
- std::cout << "(" << &Params << ")";
- std::cout << "-> " << Result << "\n";
+ std::cerr << "(" << &Params << ")";
+ std::cerr << "-> " << Result << "\n";
if (Result && Result->Details) {
- std::cout << " *Error Details* " << Result->Details << " \n";
+ std::cerr << " *Error Details* " << Result->Details << " \n";
}
}
return Result;
@@ -733,17 +733,17 @@ ol_impl_result_t olReleaseEvent_val(ol_event_handle_t Event) {
}
OL_APIEXPORT ol_result_t OL_APICALL olReleaseEvent(ol_event_handle_t Event) {
if (offloadConfig().TracingEnabled) {
- std::cout << "---> olReleaseEvent";
+ std::cerr << "---> olReleaseEvent";
}
ol_result_t Result = olReleaseEvent_val(Event);
if (offloadConfig().TracingEnabled) {
ol_release_event_params_t Params = {&Event};
- std::cout << "(" << &Params << ")";
- std::cout << "-> " << Result << "\n";
+ std::cerr << "(" << &Params << ")";
+ std::cerr << "-> " << Result << "\n";
if (Result && Result->Details) {
- std::cout << " *Error Details* " << Result->Details << " \n";
+ std::cerr << " *Error Details* " << Result->Details << " \n";
}
}
return Result;
@@ -769,17 +769,17 @@ ol_impl_result_t olWaitEvent_val(ol_event_handle_t Event) {
}
OL_APIEXPORT ol_result_t OL_APICALL olWaitEvent(ol_event_handle_t Event) {
if (offloadConfig().TracingEnabled) {
- std::cout << "---> olWaitEvent";
+ std::cerr << "---> olWaitEvent";
}
ol_result_t Result = olWaitEvent_val(Event);
if (offloadConfig().TracingEnabled) {
ol_wait_event_params_t Params = {&Event};
- std::cout << "(" << &Params << ")";
- std::cout << "-> " << Result << "\n";
+ std::cerr << "(" << &Params << ")";
+ std::cerr << "-> " << Result << "\n";
if (Result && Result->Details) {
- std::cout << " *Error Details* " << Result->Details << " \n";
+ std::cerr << " *Error Details* " << Result->Details << " \n";
}
}
return Result;
@@ -794,8 +794,8 @@ ol_result_t olWaitEventWithCodeLoc(ol_event_handle_t Event,
}
///////////////////////////////////////////////////////////////////////////////
-ol_impl_result_t olEnqueueDataWrite_val(ol_queue_handle_t Queue, void *SrcPtr,
- void *DstPtr, size_t Size,
+ol_impl_result_t olEnqueueDataWrite_val(ol_queue_handle_t Queue, void *DstPtr,
+ void *SrcPtr, size_t Size,
ol_event_handle_t *EventOut) {
if (true /*enableParameterValidation*/) {
if (Size == 0) {
@@ -806,107 +806,106 @@ ol_impl_result_t olEnqueueDataWrite_val(ol_queue_handle_t Queue, void *SrcPtr,
return OL_ERRC_INVALID_NULL_HANDLE;
}
- if (NULL == SrcPtr) {
+ if (NULL == DstPtr) {
return OL_ERRC_INVALID_NULL_POINTER;
}
- if (NULL == DstPtr) {
+ if (NULL == SrcPtr) {
return OL_ERRC_INVALID_NULL_POINTER;
}
}
- return olEnqueueDataWrite_impl(Queue, SrcPtr, DstPtr, Size, EventOut);
+ return olEnqueueDataWrite_impl(Queue, DstPtr, SrcPtr, Size, EventOut);
}
OL_APIEXPORT ol_result_t OL_APICALL
-olEnqueueDataWrite(ol_queue_handle_t Queue, void *SrcPtr, void *DstPtr,
+olEnqueueDataWrite(ol_queue_handle_t Queue, void *DstPtr, void *SrcPtr,
size_t Size, ol_event_handle_t *EventOut) {
if (offloadConfig().TracingEnabled) {
- std::cout << "---> olEnqueueDataWrite";
+ std::cerr << "---> olEnqueueDataWrite";
}
ol_result_t Result =
- olEnqueueDataWrite_val(Queue, SrcPtr, DstPtr, Size, EventOut);
+ olEnqueueDataWrite_val(Queue, DstPtr, SrcPtr, Size, EventOut);
if (offloadConfig().TracingEnabled) {
- ol_enqueue_data_write_params_t Params = {&Queue, &SrcPtr, &DstPtr, &Size,
+ ol_enqueue_data_write_params_t Params = {&Queue, &DstPtr, &SrcPtr, &Size,
&EventOut};
- std::cout << "(" << &Params << ")";
- std::cout << "-> " << Result << "\n";
+ std::cerr << "(" << &Params << ")";
+ std::cerr << "-> " << Result << "\n";
if (Result && Result->Details) {
- std::cout << " *Error Details* " << Result->Details << " \n";
+ std::cerr << " *Error Details* " << Result->Details << " \n";
}
}
return Result;
}
-ol_result_t olEnqueueDataWriteWithCodeLoc(ol_queue_handle_t Queue, void *SrcPtr,
- void *DstPtr, size_t Size,
+ol_result_t olEnqueueDataWriteWithCodeLoc(ol_queue_handle_t Queue, void *DstPtr,
+ void *SrcPtr, size_t Size,
ol_event_handle_t *EventOut,
ol_code_location_t *CodeLocation) {
currentCodeLocation() = CodeLocation;
ol_result_t Result =
- olEnqueueDataWrite(Queue, SrcPtr, DstPtr, Size, EventOut);
+ olEnqueueDataWrite(Queue, DstPtr, SrcPtr, Size, EventOut);
currentCodeLocation() = nullptr;
return Result;
}
///////////////////////////////////////////////////////////////////////////////
-ol_impl_result_t olEnqueueDataRead_val(ol_queue_handle_t Queue, void *SrcPtr,
- void *DstPtr, size_t Size,
+ol_impl_result_t olEnqueueDataRead_val(ol_queue_handle_t Queue, void *DstPtr,
+ void *SrcPtr, size_t Size,
ol_event_handle_t *EventOut) {
if (true /*enableParameterValidation*/) {
if (NULL == Queue) {
return OL_ERRC_INVALID_NULL_HANDLE;
}
- if (NULL == SrcPtr) {
+ if (NULL == DstPtr) {
return OL_ERRC_INVALID_NULL_POINTER;
}
- if (NULL == DstPtr) {
+ if (NULL == SrcPtr) {
return OL_ERRC_INVALID_NULL_POINTER;
}
}
- return olEnqueueDataRead_impl(Queue, SrcPtr, DstPtr, Size, EventOut);
+ return olEnqueueDataRead_impl(Queue, DstPtr, SrcPtr, Size, EventOut);
}
OL_APIEXPORT ol_result_t OL_APICALL
-olEnqueueDataRead(ol_queue_handle_t Queue, void *SrcPtr, void *DstPtr,
+olEnqueueDataRead(ol_queue_handle_t Queue, void *DstPtr, void *SrcPtr,
size_t Size, ol_event_handle_t *EventOut) {
if (offloadConfig().TracingEnabled) {
- std::cout << "---> olEnqueueDataRead";
+ std::cerr << "---> olEnqueueDataRead";
}
ol_result_t Result =
- olEnqueueDataRead_val(Queue, SrcPtr, DstPtr, Size, EventOut);
+ olEnqueueDataRead_val(Queue, DstPtr, SrcPtr, Size, EventOut);
if (offloadConfig().TracingEnabled) {
- ol_enqueue_data_read_params_t Params = {&Queue, &SrcPtr, &DstPtr, &Size,
+ ol_enqueue_data_read_params_t Params = {&Queue, &DstPtr, &SrcPtr, &Size,
&EventOut};
- std::cout << "(" << &Params << ")";
- std::cout << "-> " << Result << "\n";
+ std::cerr << "(" << &Params << ")";
+ std::cerr << "-> " << Result << "\n";
if (Result && Result->Details) {
- std::cout << " *Error Details* " << Result->Details << " \n";
+ std::cerr << " *Error Details* " << Result->Details << " \n";
}
}
return Result;
}
-ol_result_t olEnqueueDataReadWithCodeLoc(ol_queue_handle_t Queue, void *SrcPtr,
- void *DstPtr, size_t Size,
+ol_result_t olEnqueueDataReadWithCodeLoc(ol_queue_handle_t Queue, void *DstPtr,
+ void *SrcPtr, size_t Size,
ol_event_handle_t *EventOut,
ol_code_location_t *CodeLocation) {
currentCodeLocation() = CodeLocation;
- ol_result_t Result = olEnqueueDataRead(Queue, SrcPtr, DstPtr, Size, EventOut);
+ ol_result_t Result = olEnqueueDataRead(Queue, DstPtr, SrcPtr, Size, EventOut);
currentCodeLocation() = nullptr;
return Result;
}
///////////////////////////////////////////////////////////////////////////////
-ol_impl_result_t olEnqueueDataCopy_val(ol_queue_handle_t Queue, void *SrcPtr,
- void *DstPtr,
+ol_impl_result_t olEnqueueDataCopy_val(ol_queue_handle_t Queue,
ol_device_handle_t DstDevice,
- size_t Size,
+ void *DstPtr, void *SrcPtr, size_t Size,
ol_event_handle_t *EventOut) {
if (true /*enableParameterValidation*/) {
if (NULL == Queue) {
@@ -917,48 +916,48 @@ ol_impl_result_t olEnqueueDataCopy_val(ol_queue_handle_t Queue, void *SrcPtr,
return OL_ERRC_INVALID_NULL_HANDLE;
}
- if (NULL == SrcPtr) {
+ if (NULL == DstPtr) {
return OL_ERRC_INVALID_NULL_POINTER;
}
- if (NULL == DstPtr) {
+ if (NULL == SrcPtr) {
return OL_ERRC_INVALID_NULL_POINTER;
}
}
- return olEnqueueDataCopy_impl(Queue, SrcPtr, DstPtr, DstDevice, Size,
+ return olEnqueueDataCopy_impl(Queue, DstDevice, DstPtr, SrcPtr, Size,
EventOut);
}
OL_APIEXPORT ol_result_t OL_APICALL olEnqueueDataCopy(
- ol_queue_handle_t Queue, void *SrcPtr, void *DstPtr,
- ol_device_handle_t DstDevice, size_t Size, ol_event_handle_t *EventOut) {
+ ol_queue_handle_t Queue, ol_device_handle_t DstDevice, void *DstPtr,
+ void *SrcPtr, size_t Size, ol_event_handle_t *EventOut) {
if (offloadConfig().TracingEnabled) {
- std::cout << "---> olEnqueueDataCopy";
+ std::cerr << "---> olEnqueueDataCopy";
}
ol_result_t Result =
- olEnqueueDataCopy_val(Queue, SrcPtr, DstPtr, DstDevice, Size, EventOut);
+ olEnqueueDataCopy_val(Queue, DstDevice, DstPtr, SrcPtr, Size, EventOut);
if (offloadConfig().TracingEnabled) {
- ol_enqueue_data_copy_params_t Params = {&Queue, &SrcPtr, &DstPtr,
- &DstDevice, &Size, &EventOut};
- std::cout << "(" << &Params << ")";
- std::cout << "-> " << Result << "\n";
+ ol_enqueue_data_copy_params_t Params = {&Queue, &DstDevice, &DstPtr,
+ &SrcPtr, &Size, &EventOut};
+ std::cerr << "(" << &Params << ")";
+ std::cerr << "-> " << Result << "\n";
if (Result && Result->Details) {
- std::cout << " *Error Details* " << Result->Details << " \n";
+ std::cerr << " *Error Details* " << Result->Details << " \n";
}
}
return Result;
}
-ol_result_t olEnqueueDataCopyWithCodeLoc(ol_queue_handle_t Queue, void *SrcPtr,
- void *DstPtr,
+ol_result_t olEnqueueDataCopyWithCodeLoc(ol_queue_handle_t Queue,
ol_device_handle_t DstDevice,
+ void *DstPtr, void *SrcPtr,
size_t Size,
ol_event_handle_t *EventOut,
ol_code_location_t *CodeLocation) {
currentCodeLocation() = CodeLocation;
ol_result_t Result =
- olEnqueueDataCopy(Queue, SrcPtr, DstPtr, DstDevice, Size, EventOut);
+ olEnqueueDataCopy(Queue, DstDevice, DstPtr, SrcPtr, Size, EventOut);
currentCodeLocation() = nullptr;
return Result;
@@ -990,7 +989,7 @@ olEnqueueKernelLaunch(ol_queue_handle_t Queue, ol_kernel_handle_t Kernel,
const ol_kernel_launch_size_args_t *LaunchSizeArgs,
ol_event_handle_t *EventOut) {
if (offloadConfig().TracingEnabled) {
- std::cout << "---> olEnqueueKernelLaunch";
+ std::cerr << "---> olEnqueueKernelLaunch";
}
ol_result_t Result =
@@ -999,10 +998,10 @@ olEnqueueKernelLaunch(ol_queue_handle_t Queue, ol_kernel_handle_t Kernel,
if (offloadConfig().TracingEnabled) {
ol_enqueue_kernel_launch_params_t Params = {&Queue, &Kernel,
&LaunchSizeArgs, &EventOut};
- std::cout << "(" << &Params << ")";
- std::cout << "-> " << Result << "\n";
+ std::cerr << "(" << &Params << ")";
+ std::cerr << "-> " << Result << "\n";
if (Result && Result->Details) {
- std::cout << " *Error Details* " << Result->Details << " \n";
+ std::cerr << " *Error Details* " << Result->Details << " \n";
}
}
return Result;
@@ -1022,7 +1021,7 @@ ol_result_t olEnqueueKernelLaunchWithCodeLoc(
///////////////////////////////////////////////////////////////////////////////
ol_impl_result_t olCreateProgram_val(ol_device_handle_t Device, void *ProgData,
size_t ProgDataSize,
- ol_program_handle_t *Queue) {
+ ol_program_handle_t *Program) {
if (true /*enableParameterValidation*/) {
if (NULL == Device) {
return OL_ERRC_INVALID_NULL_HANDLE;
@@ -1032,40 +1031,40 @@ ol_impl_result_t olCreateProgram_val(ol_device_handle_t Device, void *ProgData,
return OL_ERRC_INVALID_NULL_POINTER;
}
- if (NULL == Queue) {
+ if (NULL == Program) {
return OL_ERRC_INVALID_NULL_POINTER;
}
}
- return olCreateProgram_impl(Device, ProgData, ProgDataSize, Queue);
+ return olCreateProgram_impl(Device, ProgData, ProgDataSize, Program);
}
OL_APIEXPORT ol_result_t OL_APICALL
olCreateProgram(ol_device_handle_t Device, void *ProgData, size_t ProgDataSize,
- ol_program_handle_t *Queue) {
+ ol_program_handle_t *Program) {
if (offloadConfig().TracingEnabled) {
- std::cout << "---> olCreateProgram";
+ std::cerr << "---> olCreateProgram";
}
ol_result_t Result =
- olCreateProgram_val(Device, ProgData, ProgDataSize, Queue);
+ olCreateProgram_val(Device, ProgData, ProgDataSize, Program);
if (offloadConfig().TracingEnabled) {
ol_create_program_params_t Params = {&Device, &ProgData, &ProgDataSize,
- &Queue};
- std::cout << "(" << &Params << ")";
- std::cout << "-> " << Result << "\n";
+ &Program};
+ std::cerr << "(" << &Params << ")";
+ std::cerr << "-> " << Result << "\n";
if (Result && Result->Details) {
- std::cout << " *Error Details* " << Result->Details << " \n";
+ std::cerr << " *Error Details* " << Result->Details << " \n";
}
}
return Result;
}
ol_result_t olCreateProgramWithCodeLoc(ol_device_handle_t Device,
void *ProgData, size_t ProgDataSize,
- ol_program_handle_t *Queue,
+ ol_program_handle_t *Program,
ol_code_location_t *CodeLocation) {
currentCodeLocation() = CodeLocation;
- ol_result_t Result = olCreateProgram(Device, ProgData, ProgDataSize, Queue);
+ ol_result_t Result = olCreateProgram(Device, ProgData, ProgDataSize, Program);
currentCodeLocation() = nullptr;
return Result;
@@ -1084,17 +1083,17 @@ ol_impl_result_t olRetainProgram_val(ol_program_handle_t Program) {
OL_APIEXPORT ol_result_t OL_APICALL
olRetainProgram(ol_program_handle_t Program) {
if (offloadConfig().TracingEnabled) {
- std::cout << "---> olRetainProgram";
+ std::cerr << "---> olRetainProgram";
}
ol_result_t Result = olRetainProgram_val(Program);
if (offloadConfig().TracingEnabled) {
ol_retain_program_params_t Params = {&Program};
- std::cout << "(" << &Params << ")";
- std::cout << "-> " << Result << "\n";
+ std::cerr << "(" << &Params << ")";
+ std::cerr << "-> " << Result << "\n";
if (Result && Result->Details) {
- std::cout << " *Error Details* " << Result->Details << " \n";
+ std::cerr << " *Error Details* " << Result->Details << " \n";
}
}
return Result;
@@ -1121,17 +1120,17 @@ ol_impl_result_t olReleaseProgram_val(ol_program_handle_t Program) {
OL_APIEXPORT ol_result_t OL_APICALL
olReleaseProgram(ol_program_handle_t Program) {
if (offloadConfig().TracingEnabled) {
- std::cout << "---> olReleaseProgram";
+ std::cerr << "---> olReleaseProgram";
}
ol_result_t Result = olReleaseProgram_val(Program);
if (offloadConfig().TracingEnabled) {
ol_release_program_params_t Params = {&Program};
- std::cout << "(" << &Params << ")";
- std::cout << "-> " << Result << "\n";
+ std::cerr << "(" << &Params << ")";
+ std::cerr << "-> " << Result << "\n";
if (Result && Result->Details) {
- std::cout << " *Error Details* " << Result->Details << " \n";
+ std::cerr << " *Error Details* " << Result->Details << " \n";
}
}
return Result;
@@ -1169,17 +1168,17 @@ OL_APIEXPORT ol_result_t OL_APICALL olCreateKernel(ol_program_handle_t Program,
const char *KernelName,
ol_kernel_handle_t *Kernel) {
if (offloadConfig().TracingEnabled) {
- std::cout << "---> olCreateKernel";
+ std::cerr << "---> olCreateKernel";
}
ol_result_t Result = olCreateKernel_val(Program, KernelName, Kernel);
if (offloadConfig().TracingEnabled) {
ol_create_kernel_params_t Params = {&Program, &KernelName, &Kernel};
- std::cout << "(" << &Params << ")";
- std::cout << "-> " << Result << "\n";
+ std::cerr << "(" << &Params << ")";
+ std::cerr << "-> " << Result << "\n";
if (Result && Result->Details) {
- std::cout << " *Error Details* " << Result->Details << " \n";
+ std::cerr << " *Error Details* " << Result->Details << " \n";
}
}
return Result;
@@ -1207,17 +1206,17 @@ ol_impl_result_t olRetainKernel_val(ol_kernel_handle_t Kernel) {
}
OL_APIEXPORT ol_result_t OL_APICALL olRetainKernel(ol_kernel_handle_t Kernel) {
if (offloadConfig().TracingEnabled) {
- std::cout << "---> olRetainKernel";
+ std::cerr << "---> olRetainKernel";
}
ol_result_t Result = olRetainKernel_val(Kernel);
if (offloadConfig().TracingEnabled) {
ol_retain_kernel_params_t Params = {&Kernel};
- std::cout << "(" << &Params << ")";
- std::cout << "-> " << Result << "\n";
+ std::cerr << "(" << &Params << ")";
+ std::cerr << "-> " << Result << "\n";
if (Result && Result->Details) {
- std::cout << " *Error Details* " << Result->Details << " \n";
+ std::cerr << " *Error Details* " << Result->Details << " \n";
}
}
return Result;
@@ -1243,17 +1242,17 @@ ol_impl_result_t olReleaseKernel_val(ol_kernel_handle_t Kernel) {
}
OL_APIEXPORT ol_result_t OL_APICALL olReleaseKernel(ol_kernel_handle_t Kernel) {
if (offloadConfig().TracingEnabled) {
- std::cout << "---> olReleaseKernel";
+ std::cerr << "---> olReleaseKernel";
}
ol_result_t Result = olReleaseKernel_val(Kernel);
if (offloadConfig().TracingEnabled) {
ol_release_kernel_params_t Params = {&Kernel};
- std::cout << "(" << &Params << ")";
- std::cout << "-> " << Result << "\n";
+ std::cerr << "(" << &Params << ")";
+ std::cerr << "-> " << Result << "\n";
if (Result && Result->Details) {
- std::cout << " *Error Details* " << Result->Details << " \n";
+ std::cerr << " *Error Details* " << Result->Details << " \n";
}
}
return Result;
@@ -1286,7 +1285,7 @@ ol_impl_result_t olSetKernelArgValue_val(ol_kernel_handle_t Kernel,
OL_APIEXPORT ol_result_t OL_APICALL olSetKernelArgValue(
ol_kernel_handle_t Kernel, uint32_t Index, size_t Size, void *ArgData) {
if (offloadConfig().TracingEnabled) {
- std::cout << "---> olSetKernelArgValue";
+ std::cerr << "---> olSetKernelArgValue";
}
ol_result_t Result = olSetKernelArgValue_val(Kernel, Index, Size, ArgData);
@@ -1294,10 +1293,10 @@ OL_APIEXPORT ol_result_t OL_APICALL olSetKernelArgValue(
if (offloadConfig().TracingEnabled) {
ol_set_kernel_arg_value_params_t Params = {&Kernel, &Index, &Size,
&ArgData};
- std::cout << "(" << &Params << ")";
- std::cout << "-> " << Result << "\n";
+ std::cerr << "(" << &Params << ")";
+ std::cerr << "-> " << Result << "\n";
if (Result && Result->Details) {
- std::cout << " *Error Details* " << Result->Details << " \n";
+ std::cerr << " *Error Details* " << Result->Details << " \n";
}
}
return Result;
@@ -1331,7 +1330,7 @@ ol_impl_result_t olSetKernelArgsData_val(ol_kernel_handle_t Kernel,
OL_APIEXPORT ol_result_t OL_APICALL olSetKernelArgsData(
ol_kernel_handle_t Kernel, void *ArgsData, size_t ArgsDataSize) {
if (offloadConfig().TracingEnabled) {
- std::cout << "---> olSetKernelArgsData";
+ std::cerr << "---> olSetKernelArgsData";
}
ol_result_t Result = olSetKernelArgsData_val(Kernel, ArgsData, ArgsDataSize);
@@ -1339,10 +1338,10 @@ OL_APIEXPORT ol_result_t OL_APICALL olSetKernelArgsData(
if (offloadConfig().TracingEnabled) {
ol_set_kernel_args_data_params_t Params = {&Kernel, &ArgsData,
&ArgsDataSize};
- std::cout << "(" << &Params << ")";
- std::cout << "-> " << Result << "\n";
+ std::cerr << "(" << &Params << ")";
+ std::cerr << "-> " << Result << "\n";
if (Result && Result->Details) {
- std::cout << " *Error Details* " << Result->Details << " \n";
+ std::cerr << " *Error Details* " << Result->Details << " \n";
}
}
return Result;
diff --git a/offload/liboffload/include/generated/OffloadImplFuncDecls.inc b/offload/liboffload/include/generated/OffloadImplFuncDecls.inc
index e7179e44fc9ec..976422f3d7fd5 100644
--- a/offload/liboffload/include/generated/OffloadImplFuncDecls.inc
+++ b/offload/liboffload/include/generated/OffloadImplFuncDecls.inc
@@ -59,18 +59,17 @@ ol_impl_result_t olReleaseEvent_impl(ol_event_handle_t Event);
ol_impl_result_t olWaitEvent_impl(ol_event_handle_t Event);
-ol_impl_result_t olEnqueueDataWrite_impl(ol_queue_handle_t Queue, void *SrcPtr,
- void *DstPtr, size_t Size,
+ol_impl_result_t olEnqueueDataWrite_impl(ol_queue_handle_t Queue, void *DstPtr,
+ void *SrcPtr, size_t Size,
ol_event_handle_t *EventOut);
-ol_impl_result_t olEnqueueDataRead_impl(ol_queue_handle_t Queue, void *SrcPtr,
- void *DstPtr, size_t Size,
+ol_impl_result_t olEnqueueDataRead_impl(ol_queue_handle_t Queue, void *DstPtr,
+ void *SrcPtr, size_t Size,
ol_event_handle_t *EventOut);
-ol_impl_result_t olEnqueueDataCopy_impl(ol_queue_handle_t Queue, void *SrcPtr,
- void *DstPtr,
+ol_impl_result_t olEnqueueDataCopy_impl(ol_queue_handle_t Queue,
ol_device_handle_t DstDevice,
- size_t Size,
+ void *DstPtr, void *SrcPtr, size_t Size,
ol_event_handle_t *EventOut);
ol_impl_result_t
@@ -80,7 +79,7 @@ olEnqueueKernelLaunch_impl(ol_queue_handle_t Queue, ol_kernel_handle_t Kernel,
ol_impl_result_t olCreateProgram_impl(ol_device_handle_t Device, void *ProgData,
size_t ProgDataSize,
- ol_program_handle_t *Queue);
+ ol_program_handle_t *Program);
ol_impl_result_t olRetainProgram_impl(ol_program_handle_t Program);
diff --git a/offload/liboffload/include/generated/OffloadPrint.hpp b/offload/liboffload/include/generated/OffloadPrint.hpp
index 56ab655a4ae74..10dc58d1079a0 100644
--- a/offload/liboffload/include/generated/OffloadPrint.hpp
+++ b/offload/liboffload/include/generated/OffloadPrint.hpp
@@ -562,12 +562,12 @@ operator<<(std::ostream &os,
os << ".Queue = ";
printPtr(os, *params->pQueue);
os << ", ";
- os << ".SrcPtr = ";
- printPtr(os, *params->pSrcPtr);
- os << ", ";
os << ".DstPtr = ";
printPtr(os, *params->pDstPtr);
os << ", ";
+ os << ".SrcPtr = ";
+ printPtr(os, *params->pSrcPtr);
+ os << ", ";
os << ".Size = ";
os << *params->pSize;
os << ", ";
@@ -582,12 +582,12 @@ operator<<(std::ostream &os,
os << ".Queue = ";
printPtr(os, *params->pQueue);
os << ", ";
- os << ".SrcPtr = ";
- printPtr(os, *params->pSrcPtr);
- os << ", ";
os << ".DstPtr = ";
printPtr(os, *params->pDstPtr);
os << ", ";
+ os << ".SrcPtr = ";
+ printPtr(os, *params->pSrcPtr);
+ os << ", ";
os << ".Size = ";
os << *params->pSize;
os << ", ";
@@ -602,14 +602,14 @@ operator<<(std::ostream &os,
os << ".Queue = ";
printPtr(os, *params->pQueue);
os << ", ";
- os << ".SrcPtr = ";
- printPtr(os, *params->pSrcPtr);
+ os << ".DstDevice = ";
+ printPtr(os, *params->pDstDevice);
os << ", ";
os << ".DstPtr = ";
printPtr(os, *params->pDstPtr);
os << ", ";
- os << ".DstDevice = ";
- printPtr(os, *params->pDstDevice);
+ os << ".SrcPtr = ";
+ printPtr(os, *params->pSrcPtr);
os << ", ";
os << ".Size = ";
os << *params->pSize;
@@ -647,8 +647,8 @@ operator<<(std::ostream &os, const struct ol_create_program_params_t *params) {
os << ".ProgDataSize = ";
os << *params->pProgDataSize;
os << ", ";
- os << ".Queue = ";
- printPtr(os, *params->pQueue);
+ os << ".Program = ";
+ printPtr(os, *params->pProgram);
return os;
}
diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp
index 89cc42823261f..f6c3230558092 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -425,8 +425,8 @@ ol_event_handle_t makeEvent(ol_queue_handle_t Queue) {
return EventImpl.release();
}
-ol_impl_result_t olEnqueueDataWrite_impl(ol_queue_handle_t Queue, void *SrcPtr,
- void *DstPtr, size_t Size,
+ol_impl_result_t olEnqueueDataWrite_impl(ol_queue_handle_t Queue, void *DstPtr,
+ void *SrcPtr, size_t Size,
ol_event_handle_t *EventOut) {
auto &DeviceImpl = Queue->Device->Device;
@@ -441,8 +441,8 @@ ol_impl_result_t olEnqueueDataWrite_impl(ol_queue_handle_t Queue, void *SrcPtr,
return OL_SUCCESS;
}
-ol_impl_result_t olEnqueueDataRead_impl(ol_queue_handle_t Queue, void *SrcPtr,
- void *DstPtr, size_t Size,
+ol_impl_result_t olEnqueueDataRead_impl(ol_queue_handle_t Queue, void *DstPtr,
+ void *SrcPtr, size_t Size,
ol_event_handle_t *EventOut) {
auto &DeviceImpl = Queue->Device->Device;
@@ -457,10 +457,9 @@ ol_impl_result_t olEnqueueDataRead_impl(ol_queue_handle_t Queue, void *SrcPtr,
return OL_SUCCESS;
}
-ol_impl_result_t olEnqueueDataCopy_impl(ol_queue_handle_t Queue, void *SrcPtr,
- void *DstPtr,
+ol_impl_result_t olEnqueueDataCopy_impl(ol_queue_handle_t Queue,
ol_device_handle_t DstDevice,
- size_t Size,
+ void *DstPtr, void *SrcPtr, size_t Size,
ol_event_handle_t *EventOut) {
auto &DeviceImpl = Queue->Device->Device;
diff --git a/offload/tools/offload-tblgen/EntryPointGen.cpp b/offload/tools/offload-tblgen/EntryPointGen.cpp
index 990ff96a3121d..36fc5c3eb1c2a 100644
--- a/offload/tools/offload-tblgen/EntryPointGen.cpp
+++ b/offload/tools/offload-tblgen/EntryPointGen.cpp
@@ -72,7 +72,7 @@ static void EmitEntryPointFunc(const FunctionRec &F, raw_ostream &OS) {
// Emit pre-call prints
OS << TAB_1 "if (offloadConfig().TracingEnabled) {\n";
- OS << formatv(TAB_2 "std::cout << \"---> {0}\";\n", F.getName());
+ OS << formatv(TAB_2 "std::cerr << \"---> {0}\";\n", F.getName());
OS << TAB_1 "}\n\n";
// Perform actual function call to the validation wrapper
@@ -91,13 +91,13 @@ static void EmitEntryPointFunc(const FunctionRec &F, raw_ostream &OS) {
}
}
OS << formatv("};\n");
- OS << TAB_2 "std::cout << \"(\" << &Params << \")\";\n";
+ OS << TAB_2 "std::cerr << \"(\" << &Params << \")\";\n";
} else {
- OS << TAB_2 "std::cout << \"()\";\n";
+ OS << TAB_2 "std::cerr << \"()\";\n";
}
- OS << TAB_2 "std::cout << \"-> \" << Result << \"\\n\";\n";
+ OS << TAB_2 "std::cerr << \"-> \" << Result << \"\\n\";\n";
OS << TAB_2 "if (Result && Result->Details) {\n";
- OS << TAB_3 "std::cout << \" *Error Details* \" << Result->Details "
+ OS << TAB_3 "std::cerr << \" *Error Details* \" << Result->Details "
"<< \" \\n\";\n";
OS << TAB_2 "}\n";
OS << TAB_1 "}\n";
diff --git a/offload/unittests/OffloadAPI/enqueue/olEnqueueDataCopy.cpp b/offload/unittests/OffloadAPI/enqueue/olEnqueueDataCopy.cpp
index d15e738bc94e6..dc2791266fa14 100644
--- a/offload/unittests/OffloadAPI/enqueue/olEnqueueDataCopy.cpp
+++ b/offload/unittests/OffloadAPI/enqueue/olEnqueueDataCopy.cpp
@@ -22,11 +22,11 @@ TEST_F(olEnqueueDataCopyTest, Success) {
ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, Size, &AllocA));
ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, Size, &AllocB));
ASSERT_SUCCESS(
- olEnqueueDataWrite(Queue, Input.data(), AllocA, Size, nullptr));
+ olEnqueueDataWrite(Queue, AllocA, Input.data(), Size, nullptr));
ASSERT_SUCCESS(
- olEnqueueDataCopy(Queue, AllocA, AllocB, Device, Size, nullptr));
+ olEnqueueDataCopy(Queue, Device, AllocB, AllocA, Size, nullptr));
ASSERT_SUCCESS(
- olEnqueueDataRead(Queue, AllocB, Output.data(), Size, nullptr));
+ olEnqueueDataRead(Queue, Output.data(), AllocB, Size, nullptr));
ASSERT_SUCCESS(olFinishQueue(Queue));
for (uint8_t Val : Output) {
ASSERT_EQ(Val, 42);
diff --git a/offload/unittests/OffloadAPI/enqueue/olEnqueueDataRead.cpp b/offload/unittests/OffloadAPI/enqueue/olEnqueueDataRead.cpp
index 5787889c4febb..71323e4b44817 100644
--- a/offload/unittests/OffloadAPI/enqueue/olEnqueueDataRead.cpp
+++ b/offload/unittests/OffloadAPI/enqueue/olEnqueueDataRead.cpp
@@ -19,8 +19,8 @@ TEST_F(olEnqueueDataReadTest, Success) {
std::vector<uint8_t> Output(Size, 0);
ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, Size, &Alloc));
- ASSERT_SUCCESS(olEnqueueDataWrite(Queue, Input.data(), Alloc, Size, nullptr));
- ASSERT_SUCCESS(olEnqueueDataRead(Queue, Alloc, Output.data(), Size, nullptr));
+ ASSERT_SUCCESS(olEnqueueDataWrite(Queue, Alloc, Input.data(), Size, nullptr));
+ ASSERT_SUCCESS(olEnqueueDataRead(Queue, Output.data(), Alloc, Size, nullptr));
ASSERT_SUCCESS(olFinishQueue(Queue));
for (uint8_t Val : Output) {
ASSERT_EQ(Val, 42);
diff --git a/offload/unittests/OffloadAPI/enqueue/olEnqueueDataWrite.cpp b/offload/unittests/OffloadAPI/enqueue/olEnqueueDataWrite.cpp
index d3f3edf58a531..f60d501a9918d 100644
--- a/offload/unittests/OffloadAPI/enqueue/olEnqueueDataWrite.cpp
+++ b/offload/unittests/OffloadAPI/enqueue/olEnqueueDataWrite.cpp
@@ -17,7 +17,7 @@ TEST_F(olEnqueueDataWriteTest, Success) {
void *Alloc;
ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, Size, &Alloc));
std::vector<uint8_t> Input(Size, 42);
- ASSERT_SUCCESS(olEnqueueDataWrite(Queue, Input.data(), Alloc, Size, nullptr));
+ ASSERT_SUCCESS(olEnqueueDataWrite(Queue, Alloc, Input.data(), Size, nullptr));
olFinishQueue(Queue);
olMemFree(Device, OL_ALLOC_TYPE_DEVICE, Alloc);
}
>From 3fbdf61727c9e803134b54863af8dca009bde5b8 Mon Sep 17 00:00:00 2001
From: Callum Fare <callum at codeplay.com>
Date: Tue, 11 Feb 2025 17:17:33 +0000
Subject: [PATCH 15/16] Formatting
---
offload/liboffload/src/OffloadImpl.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp
index f6c3230558092..d395eb34a5f4a 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -130,7 +130,7 @@ void initPlugins() {
// Attempt to create an instance of each supported plugin.
#define PLUGIN_TARGET(Name) \
do { \
- Platforms().emplace_back(ol_platform_impl_t{ \
+ Platforms().emplace_back(ol_platform_impl_t{ \
std::unique_ptr<GenericPluginTy>(createPlugin_##Name()), {}}); \
} while (false);
#include "Shared/Targets.def"
>From 0ca7527f782c53f7f33cc9fda401fabea0105a0a Mon Sep 17 00:00:00 2001
From: Callum Fare <callum at codeplay.com>
Date: Thu, 13 Feb 2025 16:47:19 +0000
Subject: [PATCH 16/16] Alternative memcpy implementation
---
offload/liboffload/API/Device.td | 12 +
offload/liboffload/API/Enqueue.td | 27 ++-
.../liboffload/include/generated/OffloadAPI.h | 132 +++++++++--
.../include/generated/OffloadEntryPoints.inc | 207 ++++++++++++++----
.../include/generated/OffloadFuncs.inc | 16 +-
.../generated/OffloadImplFuncDecls.inc | 33 ++-
.../include/generated/OffloadPrint.hpp | 38 +++-
offload/liboffload/src/OffloadImpl.cpp | 114 +++++++---
offload/unittests/OffloadAPI/CMakeLists.txt | 1 +
.../OffloadAPI/enqueue/olEnqueueDataCopy.cpp | 12 +-
.../OffloadAPI/enqueue/olEnqueueDataRead.cpp | 12 +-
.../OffloadAPI/enqueue/olEnqueueDataWrite.cpp | 9 +-
.../OffloadAPI/enqueue/olEnqueueMemcpy.cpp | 71 ++++++
13 files changed, 542 insertions(+), 142 deletions(-)
create mode 100644 offload/unittests/OffloadAPI/enqueue/olEnqueueMemcpy.cpp
diff --git a/offload/liboffload/API/Device.td b/offload/liboffload/API/Device.td
index 30c0b71fe7b37..ee1ca38b48a07 100644
--- a/offload/liboffload/API/Device.td
+++ b/offload/liboffload/API/Device.td
@@ -104,3 +104,15 @@ def : Function {
Return<"OL_ERRC_INVALID_DEVICE">
];
}
+
+def : Function {
+ let name = "olGetHostDevice";
+ let desc = "Return the special host device used to represent the host in memory transfer operations";
+ let details = [
+ "The host device does not support queues"
+ ];
+ let params = [
+ Param<"ol_device_handle_t*", "Device", "Output pointer for the device">
+ ]; // TODO: Take a platform?
+ let returns = [];
+}
diff --git a/offload/liboffload/API/Enqueue.td b/offload/liboffload/API/Enqueue.td
index 695b157ac1de3..723d87cfb2d29 100644
--- a/offload/liboffload/API/Enqueue.td
+++ b/offload/liboffload/API/Enqueue.td
@@ -11,7 +11,28 @@
//===----------------------------------------------------------------------===//
def : Function {
- let name = "olEnqueueDataWrite";
+ let name = "olEnqueueMemcpy";
+ let desc = "Enqueue a memcpy operation.";
+ let details = [
+ "For host pointers, use the device returned by olGetHostDevice",
+ "At least one device must be a non-host device"
+ ];
+ let params = [
+ Param<"ol_queue_handle_t", "Queue", "handle of the queue", PARAM_IN>,
+ Param<"void*", "DstPtr", "pointer to copy to", PARAM_IN>,
+ Param<"ol_device_handle_t", "DstDevice", "device that DstPtr belongs to", PARAM_IN>,
+ Param<"void*", "SrcPtr", "pointer to copy from", PARAM_IN>,
+ Param<"ol_device_handle_t", "SrcDevice", "device that SrcPtr belongs to", PARAM_IN>,
+ Param<"size_t", "Size", "size in bytes of data to copy", PARAM_IN>,
+ Param<"ol_event_handle_t*", "EventOut", "optional recorded event for the enqueued operation", PARAM_OUT_OPTIONAL>
+ ];
+ let returns = [
+ Return<"OL_ERRC_INVALID_SIZE", ["`Size == 0`"]>
+ ];
+}
+
+def : Function {
+ let name = "olEnqueueMemcpyHtoD";
let desc = "Enqueue a write operation from host to device memory";
let details = [];
let params = [
@@ -27,7 +48,7 @@ def : Function {
}
def : Function {
- let name = "olEnqueueDataRead";
+ let name = "olEnqueueMemcpyDtoH";
let desc = "Enqueue a read operation from device to host memory";
let details = [];
let params = [
@@ -41,7 +62,7 @@ def : Function {
}
def : Function {
- let name = "olEnqueueDataCopy";
+ let name = "olEnqueueMemcpyDtoD";
let desc = "Enqueue a write operation between device allocations";
let details = [];
let params = [
diff --git a/offload/liboffload/include/generated/OffloadAPI.h b/offload/liboffload/include/generated/OffloadAPI.h
index 110d252fe45a7..dd301f564a283 100644
--- a/offload/liboffload/include/generated/OffloadAPI.h
+++ b/offload/liboffload/include/generated/OffloadAPI.h
@@ -474,6 +474,24 @@ OL_APIEXPORT ol_result_t OL_APICALL olGetDeviceInfoSize(
// [out] pointer to the number of bytes required to store the query
size_t *PropSizeRet);
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Return the special host device used to represent the host in memory
+/// transfer operations
+///
+/// @details
+/// - The host device does not support queues
+///
+/// @returns
+/// - ::OL_RESULT_SUCCESS
+/// - ::OL_ERRC_UNINITIALIZED
+/// - ::OL_ERRC_DEVICE_LOST
+/// - ::OL_ERRC_INVALID_NULL_HANDLE
+/// - ::OL_ERRC_INVALID_NULL_POINTER
+/// + `NULL == Device`
+OL_APIEXPORT ol_result_t OL_APICALL olGetHostDevice(
+ // Output pointer for the device
+ ol_device_handle_t *Device);
+
///////////////////////////////////////////////////////////////////////////////
/// @brief Represents the type of allocation made with olMemAlloc
typedef enum ol_alloc_type_t {
@@ -653,6 +671,42 @@ OL_APIEXPORT ol_result_t OL_APICALL olWaitEvent(
// [in] handle of the event
ol_event_handle_t Event);
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Enqueue a memcpy operation.
+///
+/// @details
+/// - For host pointers, use the device returned by olGetHostDevice
+/// - At least one device must be a non-host device
+///
+/// @returns
+/// - ::OL_RESULT_SUCCESS
+/// - ::OL_ERRC_UNINITIALIZED
+/// - ::OL_ERRC_DEVICE_LOST
+/// - ::OL_ERRC_INVALID_SIZE
+/// + `Size == 0`
+/// - ::OL_ERRC_INVALID_NULL_HANDLE
+/// + `NULL == Queue`
+/// + `NULL == DstDevice`
+/// + `NULL == SrcDevice`
+/// - ::OL_ERRC_INVALID_NULL_POINTER
+/// + `NULL == DstPtr`
+/// + `NULL == SrcPtr`
+OL_APIEXPORT ol_result_t OL_APICALL olEnqueueMemcpy(
+ // [in] handle of the queue
+ ol_queue_handle_t Queue,
+ // [in] pointer to copy to
+ void *DstPtr,
+ // [in] device that DstPtr belongs to
+ ol_device_handle_t DstDevice,
+ // [in] pointer to copy from
+ void *SrcPtr,
+ // [in] device that SrcPtr belongs to
+ ol_device_handle_t SrcDevice,
+ // [in] size in bytes of data to copy
+ size_t Size,
+ // [out][optional] optional recorded event for the enqueued operation
+ ol_event_handle_t *EventOut);
+
///////////////////////////////////////////////////////////////////////////////
/// @brief Enqueue a write operation from host to device memory
///
@@ -669,7 +723,7 @@ OL_APIEXPORT ol_result_t OL_APICALL olWaitEvent(
/// - ::OL_ERRC_INVALID_NULL_POINTER
/// + `NULL == DstPtr`
/// + `NULL == SrcPtr`
-OL_APIEXPORT ol_result_t OL_APICALL olEnqueueDataWrite(
+OL_APIEXPORT ol_result_t OL_APICALL olEnqueueMemcpyHtoD(
// [in] handle of the queue
ol_queue_handle_t Queue,
// [in] device pointer to copy to
@@ -695,7 +749,7 @@ OL_APIEXPORT ol_result_t OL_APICALL olEnqueueDataWrite(
/// - ::OL_ERRC_INVALID_NULL_POINTER
/// + `NULL == DstPtr`
/// + `NULL == SrcPtr`
-OL_APIEXPORT ol_result_t OL_APICALL olEnqueueDataRead(
+OL_APIEXPORT ol_result_t OL_APICALL olEnqueueMemcpyDtoH(
// [in] handle of the queue
ol_queue_handle_t Queue,
// [in] host pointer to copy to
@@ -722,7 +776,7 @@ OL_APIEXPORT ol_result_t OL_APICALL olEnqueueDataRead(
/// - ::OL_ERRC_INVALID_NULL_POINTER
/// + `NULL == DstPtr`
/// + `NULL == SrcPtr`
-OL_APIEXPORT ol_result_t OL_APICALL olEnqueueDataCopy(
+OL_APIEXPORT ol_result_t OL_APICALL olEnqueueMemcpyDtoD(
// [in] handle of the queue
ol_queue_handle_t Queue,
// [in] device that the destination pointer is resident on
@@ -1008,6 +1062,13 @@ typedef struct ol_get_device_info_size_params_t {
size_t **pPropSizeRet;
} ol_get_device_info_size_params_t;
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Function parameters for olGetHostDevice
+/// @details Each entry is a pointer to the parameter passed to the function;
+typedef struct ol_get_host_device_params_t {
+ ol_device_handle_t **pDevice;
+} ol_get_host_device_params_t;
+
///////////////////////////////////////////////////////////////////////////////
/// @brief Function parameters for olMemAlloc
/// @details Each entry is a pointer to the parameter passed to the function;
@@ -1078,38 +1139,51 @@ typedef struct ol_wait_event_params_t {
} ol_wait_event_params_t;
///////////////////////////////////////////////////////////////////////////////
-/// @brief Function parameters for olEnqueueDataWrite
+/// @brief Function parameters for olEnqueueMemcpy
+/// @details Each entry is a pointer to the parameter passed to the function;
+typedef struct ol_enqueue_memcpy_params_t {
+ ol_queue_handle_t *pQueue;
+ void **pDstPtr;
+ ol_device_handle_t *pDstDevice;
+ void **pSrcPtr;
+ ol_device_handle_t *pSrcDevice;
+ size_t *pSize;
+ ol_event_handle_t **pEventOut;
+} ol_enqueue_memcpy_params_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Function parameters for olEnqueueMemcpyHtoD
/// @details Each entry is a pointer to the parameter passed to the function;
-typedef struct ol_enqueue_data_write_params_t {
+typedef struct ol_enqueue_memcpy_hto_d_params_t {
ol_queue_handle_t *pQueue;
void **pDstPtr;
void **pSrcPtr;
size_t *pSize;
ol_event_handle_t **pEventOut;
-} ol_enqueue_data_write_params_t;
+} ol_enqueue_memcpy_hto_d_params_t;
///////////////////////////////////////////////////////////////////////////////
-/// @brief Function parameters for olEnqueueDataRead
+/// @brief Function parameters for olEnqueueMemcpyDtoH
/// @details Each entry is a pointer to the parameter passed to the function;
-typedef struct ol_enqueue_data_read_params_t {
+typedef struct ol_enqueue_memcpy_dto_h_params_t {
ol_queue_handle_t *pQueue;
void **pDstPtr;
void **pSrcPtr;
size_t *pSize;
ol_event_handle_t **pEventOut;
-} ol_enqueue_data_read_params_t;
+} ol_enqueue_memcpy_dto_h_params_t;
///////////////////////////////////////////////////////////////////////////////
-/// @brief Function parameters for olEnqueueDataCopy
+/// @brief Function parameters for olEnqueueMemcpyDtoD
/// @details Each entry is a pointer to the parameter passed to the function;
-typedef struct ol_enqueue_data_copy_params_t {
+typedef struct ol_enqueue_memcpy_dto_d_params_t {
ol_queue_handle_t *pQueue;
ol_device_handle_t *pDstDevice;
void **pDstPtr;
void **pSrcPtr;
size_t *pSize;
ol_event_handle_t **pEventOut;
-} ol_enqueue_data_copy_params_t;
+} ol_enqueue_memcpy_dto_d_params_t;
///////////////////////////////////////////////////////////////////////////////
/// @brief Function parameters for olEnqueueKernelLaunch
@@ -1262,6 +1336,13 @@ OL_APIEXPORT ol_result_t OL_APICALL olGetDeviceInfoSizeWithCodeLoc(
ol_device_handle_t Device, ol_device_info_t PropName, size_t *PropSizeRet,
ol_code_location_t *CodeLocation);
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Variant of olGetHostDevice that also sets source code location
+/// information
+/// @details See also ::olGetHostDevice
+OL_APIEXPORT ol_result_t OL_APICALL olGetHostDeviceWithCodeLoc(
+ ol_device_handle_t *Device, ol_code_location_t *CodeLocation);
+
///////////////////////////////////////////////////////////////////////////////
/// @brief Variant of olMemAlloc that also sets source code location information
/// @details See also ::olMemAlloc
@@ -1327,26 +1408,35 @@ OL_APIEXPORT ol_result_t OL_APICALL olWaitEventWithCodeLoc(
ol_event_handle_t Event, ol_code_location_t *CodeLocation);
///////////////////////////////////////////////////////////////////////////////
-/// @brief Variant of olEnqueueDataWrite that also sets source code location
+/// @brief Variant of olEnqueueMemcpy that also sets source code location
+/// information
+/// @details See also ::olEnqueueMemcpy
+OL_APIEXPORT ol_result_t OL_APICALL olEnqueueMemcpyWithCodeLoc(
+ ol_queue_handle_t Queue, void *DstPtr, ol_device_handle_t DstDevice,
+ void *SrcPtr, ol_device_handle_t SrcDevice, size_t Size,
+ ol_event_handle_t *EventOut, ol_code_location_t *CodeLocation);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Variant of olEnqueueMemcpyHtoD that also sets source code location
/// information
-/// @details See also ::olEnqueueDataWrite
-OL_APIEXPORT ol_result_t OL_APICALL olEnqueueDataWriteWithCodeLoc(
+/// @details See also ::olEnqueueMemcpyHtoD
+OL_APIEXPORT ol_result_t OL_APICALL olEnqueueMemcpyHtoDWithCodeLoc(
ol_queue_handle_t Queue, void *DstPtr, void *SrcPtr, size_t Size,
ol_event_handle_t *EventOut, ol_code_location_t *CodeLocation);
///////////////////////////////////////////////////////////////////////////////
-/// @brief Variant of olEnqueueDataRead that also sets source code location
+/// @brief Variant of olEnqueueMemcpyDtoH that also sets source code location
/// information
-/// @details See also ::olEnqueueDataRead
-OL_APIEXPORT ol_result_t OL_APICALL olEnqueueDataReadWithCodeLoc(
+/// @details See also ::olEnqueueMemcpyDtoH
+OL_APIEXPORT ol_result_t OL_APICALL olEnqueueMemcpyDtoHWithCodeLoc(
ol_queue_handle_t Queue, void *DstPtr, void *SrcPtr, size_t Size,
ol_event_handle_t *EventOut, ol_code_location_t *CodeLocation);
///////////////////////////////////////////////////////////////////////////////
-/// @brief Variant of olEnqueueDataCopy that also sets source code location
+/// @brief Variant of olEnqueueMemcpyDtoD that also sets source code location
/// information
-/// @details See also ::olEnqueueDataCopy
-OL_APIEXPORT ol_result_t OL_APICALL olEnqueueDataCopyWithCodeLoc(
+/// @details See also ::olEnqueueMemcpyDtoD
+OL_APIEXPORT ol_result_t OL_APICALL olEnqueueMemcpyDtoDWithCodeLoc(
ol_queue_handle_t Queue, ol_device_handle_t DstDevice, void *DstPtr,
void *SrcPtr, size_t Size, ol_event_handle_t *EventOut,
ol_code_location_t *CodeLocation);
diff --git a/offload/liboffload/include/generated/OffloadEntryPoints.inc b/offload/liboffload/include/generated/OffloadEntryPoints.inc
index bd9641f74d1bb..c3104c2db735e 100644
--- a/offload/liboffload/include/generated/OffloadEntryPoints.inc
+++ b/offload/liboffload/include/generated/OffloadEntryPoints.inc
@@ -440,6 +440,43 @@ ol_result_t olGetDeviceInfoSizeWithCodeLoc(ol_device_handle_t Device,
return Result;
}
+///////////////////////////////////////////////////////////////////////////////
+ol_impl_result_t olGetHostDevice_val(ol_device_handle_t *Device) {
+ if (true /*enableParameterValidation*/) {
+ if (NULL == Device) {
+ return OL_ERRC_INVALID_NULL_POINTER;
+ }
+ }
+
+ return olGetHostDevice_impl(Device);
+}
+OL_APIEXPORT ol_result_t OL_APICALL
+olGetHostDevice(ol_device_handle_t *Device) {
+ if (offloadConfig().TracingEnabled) {
+ std::cerr << "---> olGetHostDevice";
+ }
+
+ ol_result_t Result = olGetHostDevice_val(Device);
+
+ if (offloadConfig().TracingEnabled) {
+ ol_get_host_device_params_t Params = {&Device};
+ std::cerr << "(" << &Params << ")";
+ std::cerr << "-> " << Result << "\n";
+ if (Result && Result->Details) {
+ std::cerr << " *Error Details* " << Result->Details << " \n";
+ }
+ }
+ return Result;
+}
+ol_result_t olGetHostDeviceWithCodeLoc(ol_device_handle_t *Device,
+ ol_code_location_t *CodeLocation) {
+ currentCodeLocation() = CodeLocation;
+ ol_result_t Result = olGetHostDevice(Device);
+
+ currentCodeLocation() = nullptr;
+ return Result;
+}
+
///////////////////////////////////////////////////////////////////////////////
ol_impl_result_t olMemAlloc_val(ol_device_handle_t Device, ol_alloc_type_t Type,
size_t Size, void **AllocationOut) {
@@ -794,9 +831,79 @@ ol_result_t olWaitEventWithCodeLoc(ol_event_handle_t Event,
}
///////////////////////////////////////////////////////////////////////////////
-ol_impl_result_t olEnqueueDataWrite_val(ol_queue_handle_t Queue, void *DstPtr,
- void *SrcPtr, size_t Size,
- ol_event_handle_t *EventOut) {
+ol_impl_result_t olEnqueueMemcpy_val(ol_queue_handle_t Queue, void *DstPtr,
+ ol_device_handle_t DstDevice, void *SrcPtr,
+ ol_device_handle_t SrcDevice, size_t Size,
+ ol_event_handle_t *EventOut) {
+ if (true /*enableParameterValidation*/) {
+ if (Size == 0) {
+ return OL_ERRC_INVALID_SIZE;
+ }
+
+ if (NULL == Queue) {
+ return OL_ERRC_INVALID_NULL_HANDLE;
+ }
+
+ if (NULL == DstDevice) {
+ return OL_ERRC_INVALID_NULL_HANDLE;
+ }
+
+ if (NULL == SrcDevice) {
+ return OL_ERRC_INVALID_NULL_HANDLE;
+ }
+
+ if (NULL == DstPtr) {
+ return OL_ERRC_INVALID_NULL_POINTER;
+ }
+
+ if (NULL == SrcPtr) {
+ return OL_ERRC_INVALID_NULL_POINTER;
+ }
+ }
+
+ return olEnqueueMemcpy_impl(Queue, DstPtr, DstDevice, SrcPtr, SrcDevice, Size,
+ EventOut);
+}
+OL_APIEXPORT ol_result_t OL_APICALL olEnqueueMemcpy(
+ ol_queue_handle_t Queue, void *DstPtr, ol_device_handle_t DstDevice,
+ void *SrcPtr, ol_device_handle_t SrcDevice, size_t Size,
+ ol_event_handle_t *EventOut) {
+ if (offloadConfig().TracingEnabled) {
+ std::cerr << "---> olEnqueueMemcpy";
+ }
+
+ ol_result_t Result = olEnqueueMemcpy_val(Queue, DstPtr, DstDevice, SrcPtr,
+ SrcDevice, Size, EventOut);
+
+ if (offloadConfig().TracingEnabled) {
+ ol_enqueue_memcpy_params_t Params = {
+ &Queue, &DstPtr, &DstDevice, &SrcPtr, &SrcDevice, &Size, &EventOut};
+ std::cerr << "(" << &Params << ")";
+ std::cerr << "-> " << Result << "\n";
+ if (Result && Result->Details) {
+ std::cerr << " *Error Details* " << Result->Details << " \n";
+ }
+ }
+ return Result;
+}
+ol_result_t olEnqueueMemcpyWithCodeLoc(ol_queue_handle_t Queue, void *DstPtr,
+ ol_device_handle_t DstDevice,
+ void *SrcPtr,
+ ol_device_handle_t SrcDevice,
+ size_t Size, ol_event_handle_t *EventOut,
+ ol_code_location_t *CodeLocation) {
+ currentCodeLocation() = CodeLocation;
+ ol_result_t Result = olEnqueueMemcpy(Queue, DstPtr, DstDevice, SrcPtr,
+ SrcDevice, Size, EventOut);
+
+ currentCodeLocation() = nullptr;
+ return Result;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+ol_impl_result_t olEnqueueMemcpyHtoD_val(ol_queue_handle_t Queue, void *DstPtr,
+ void *SrcPtr, size_t Size,
+ ol_event_handle_t *EventOut) {
if (true /*enableParameterValidation*/) {
if (Size == 0) {
return OL_ERRC_INVALID_SIZE;
@@ -815,21 +922,21 @@ ol_impl_result_t olEnqueueDataWrite_val(ol_queue_handle_t Queue, void *DstPtr,
}
}
- return olEnqueueDataWrite_impl(Queue, DstPtr, SrcPtr, Size, EventOut);
+ return olEnqueueMemcpyHtoD_impl(Queue, DstPtr, SrcPtr, Size, EventOut);
}
OL_APIEXPORT ol_result_t OL_APICALL
-olEnqueueDataWrite(ol_queue_handle_t Queue, void *DstPtr, void *SrcPtr,
- size_t Size, ol_event_handle_t *EventOut) {
+olEnqueueMemcpyHtoD(ol_queue_handle_t Queue, void *DstPtr, void *SrcPtr,
+ size_t Size, ol_event_handle_t *EventOut) {
if (offloadConfig().TracingEnabled) {
- std::cerr << "---> olEnqueueDataWrite";
+ std::cerr << "---> olEnqueueMemcpyHtoD";
}
ol_result_t Result =
- olEnqueueDataWrite_val(Queue, DstPtr, SrcPtr, Size, EventOut);
+ olEnqueueMemcpyHtoD_val(Queue, DstPtr, SrcPtr, Size, EventOut);
if (offloadConfig().TracingEnabled) {
- ol_enqueue_data_write_params_t Params = {&Queue, &DstPtr, &SrcPtr, &Size,
- &EventOut};
+ ol_enqueue_memcpy_hto_d_params_t Params = {&Queue, &DstPtr, &SrcPtr, &Size,
+ &EventOut};
std::cerr << "(" << &Params << ")";
std::cerr << "-> " << Result << "\n";
if (Result && Result->Details) {
@@ -838,22 +945,23 @@ olEnqueueDataWrite(ol_queue_handle_t Queue, void *DstPtr, void *SrcPtr,
}
return Result;
}
-ol_result_t olEnqueueDataWriteWithCodeLoc(ol_queue_handle_t Queue, void *DstPtr,
- void *SrcPtr, size_t Size,
- ol_event_handle_t *EventOut,
- ol_code_location_t *CodeLocation) {
+ol_result_t olEnqueueMemcpyHtoDWithCodeLoc(ol_queue_handle_t Queue,
+ void *DstPtr, void *SrcPtr,
+ size_t Size,
+ ol_event_handle_t *EventOut,
+ ol_code_location_t *CodeLocation) {
currentCodeLocation() = CodeLocation;
ol_result_t Result =
- olEnqueueDataWrite(Queue, DstPtr, SrcPtr, Size, EventOut);
+ olEnqueueMemcpyHtoD(Queue, DstPtr, SrcPtr, Size, EventOut);
currentCodeLocation() = nullptr;
return Result;
}
///////////////////////////////////////////////////////////////////////////////
-ol_impl_result_t olEnqueueDataRead_val(ol_queue_handle_t Queue, void *DstPtr,
- void *SrcPtr, size_t Size,
- ol_event_handle_t *EventOut) {
+ol_impl_result_t olEnqueueMemcpyDtoH_val(ol_queue_handle_t Queue, void *DstPtr,
+ void *SrcPtr, size_t Size,
+ ol_event_handle_t *EventOut) {
if (true /*enableParameterValidation*/) {
if (NULL == Queue) {
return OL_ERRC_INVALID_NULL_HANDLE;
@@ -868,21 +976,21 @@ ol_impl_result_t olEnqueueDataRead_val(ol_queue_handle_t Queue, void *DstPtr,
}
}
- return olEnqueueDataRead_impl(Queue, DstPtr, SrcPtr, Size, EventOut);
+ return olEnqueueMemcpyDtoH_impl(Queue, DstPtr, SrcPtr, Size, EventOut);
}
OL_APIEXPORT ol_result_t OL_APICALL
-olEnqueueDataRead(ol_queue_handle_t Queue, void *DstPtr, void *SrcPtr,
- size_t Size, ol_event_handle_t *EventOut) {
+olEnqueueMemcpyDtoH(ol_queue_handle_t Queue, void *DstPtr, void *SrcPtr,
+ size_t Size, ol_event_handle_t *EventOut) {
if (offloadConfig().TracingEnabled) {
- std::cerr << "---> olEnqueueDataRead";
+ std::cerr << "---> olEnqueueMemcpyDtoH";
}
ol_result_t Result =
- olEnqueueDataRead_val(Queue, DstPtr, SrcPtr, Size, EventOut);
+ olEnqueueMemcpyDtoH_val(Queue, DstPtr, SrcPtr, Size, EventOut);
if (offloadConfig().TracingEnabled) {
- ol_enqueue_data_read_params_t Params = {&Queue, &DstPtr, &SrcPtr, &Size,
- &EventOut};
+ ol_enqueue_memcpy_dto_h_params_t Params = {&Queue, &DstPtr, &SrcPtr, &Size,
+ &EventOut};
std::cerr << "(" << &Params << ")";
std::cerr << "-> " << Result << "\n";
if (Result && Result->Details) {
@@ -891,22 +999,25 @@ olEnqueueDataRead(ol_queue_handle_t Queue, void *DstPtr, void *SrcPtr,
}
return Result;
}
-ol_result_t olEnqueueDataReadWithCodeLoc(ol_queue_handle_t Queue, void *DstPtr,
- void *SrcPtr, size_t Size,
- ol_event_handle_t *EventOut,
- ol_code_location_t *CodeLocation) {
+ol_result_t olEnqueueMemcpyDtoHWithCodeLoc(ol_queue_handle_t Queue,
+ void *DstPtr, void *SrcPtr,
+ size_t Size,
+ ol_event_handle_t *EventOut,
+ ol_code_location_t *CodeLocation) {
currentCodeLocation() = CodeLocation;
- ol_result_t Result = olEnqueueDataRead(Queue, DstPtr, SrcPtr, Size, EventOut);
+ ol_result_t Result =
+ olEnqueueMemcpyDtoH(Queue, DstPtr, SrcPtr, Size, EventOut);
currentCodeLocation() = nullptr;
return Result;
}
///////////////////////////////////////////////////////////////////////////////
-ol_impl_result_t olEnqueueDataCopy_val(ol_queue_handle_t Queue,
- ol_device_handle_t DstDevice,
- void *DstPtr, void *SrcPtr, size_t Size,
- ol_event_handle_t *EventOut) {
+ol_impl_result_t olEnqueueMemcpyDtoD_val(ol_queue_handle_t Queue,
+ ol_device_handle_t DstDevice,
+ void *DstPtr, void *SrcPtr,
+ size_t Size,
+ ol_event_handle_t *EventOut) {
if (true /*enableParameterValidation*/) {
if (NULL == Queue) {
return OL_ERRC_INVALID_NULL_HANDLE;
@@ -925,22 +1036,22 @@ ol_impl_result_t olEnqueueDataCopy_val(ol_queue_handle_t Queue,
}
}
- return olEnqueueDataCopy_impl(Queue, DstDevice, DstPtr, SrcPtr, Size,
- EventOut);
+ return olEnqueueMemcpyDtoD_impl(Queue, DstDevice, DstPtr, SrcPtr, Size,
+ EventOut);
}
-OL_APIEXPORT ol_result_t OL_APICALL olEnqueueDataCopy(
+OL_APIEXPORT ol_result_t OL_APICALL olEnqueueMemcpyDtoD(
ol_queue_handle_t Queue, ol_device_handle_t DstDevice, void *DstPtr,
void *SrcPtr, size_t Size, ol_event_handle_t *EventOut) {
if (offloadConfig().TracingEnabled) {
- std::cerr << "---> olEnqueueDataCopy";
+ std::cerr << "---> olEnqueueMemcpyDtoD";
}
ol_result_t Result =
- olEnqueueDataCopy_val(Queue, DstDevice, DstPtr, SrcPtr, Size, EventOut);
+ olEnqueueMemcpyDtoD_val(Queue, DstDevice, DstPtr, SrcPtr, Size, EventOut);
if (offloadConfig().TracingEnabled) {
- ol_enqueue_data_copy_params_t Params = {&Queue, &DstDevice, &DstPtr,
- &SrcPtr, &Size, &EventOut};
+ ol_enqueue_memcpy_dto_d_params_t Params = {&Queue, &DstDevice, &DstPtr,
+ &SrcPtr, &Size, &EventOut};
std::cerr << "(" << &Params << ")";
std::cerr << "-> " << Result << "\n";
if (Result && Result->Details) {
@@ -949,15 +1060,15 @@ OL_APIEXPORT ol_result_t OL_APICALL olEnqueueDataCopy(
}
return Result;
}
-ol_result_t olEnqueueDataCopyWithCodeLoc(ol_queue_handle_t Queue,
- ol_device_handle_t DstDevice,
- void *DstPtr, void *SrcPtr,
- size_t Size,
- ol_event_handle_t *EventOut,
- ol_code_location_t *CodeLocation) {
+ol_result_t olEnqueueMemcpyDtoDWithCodeLoc(ol_queue_handle_t Queue,
+ ol_device_handle_t DstDevice,
+ void *DstPtr, void *SrcPtr,
+ size_t Size,
+ ol_event_handle_t *EventOut,
+ ol_code_location_t *CodeLocation) {
currentCodeLocation() = CodeLocation;
ol_result_t Result =
- olEnqueueDataCopy(Queue, DstDevice, DstPtr, SrcPtr, Size, EventOut);
+ olEnqueueMemcpyDtoD(Queue, DstDevice, DstPtr, SrcPtr, Size, EventOut);
currentCodeLocation() = nullptr;
return Result;
diff --git a/offload/liboffload/include/generated/OffloadFuncs.inc b/offload/liboffload/include/generated/OffloadFuncs.inc
index 05a8e47251254..6307d0a54b59e 100644
--- a/offload/liboffload/include/generated/OffloadFuncs.inc
+++ b/offload/liboffload/include/generated/OffloadFuncs.inc
@@ -20,6 +20,7 @@ OFFLOAD_FUNC(olGetDeviceCount)
OFFLOAD_FUNC(olGetDevice)
OFFLOAD_FUNC(olGetDeviceInfo)
OFFLOAD_FUNC(olGetDeviceInfoSize)
+OFFLOAD_FUNC(olGetHostDevice)
OFFLOAD_FUNC(olMemAlloc)
OFFLOAD_FUNC(olMemFree)
OFFLOAD_FUNC(olCreateQueue)
@@ -29,9 +30,10 @@ OFFLOAD_FUNC(olFinishQueue)
OFFLOAD_FUNC(olRetainEvent)
OFFLOAD_FUNC(olReleaseEvent)
OFFLOAD_FUNC(olWaitEvent)
-OFFLOAD_FUNC(olEnqueueDataWrite)
-OFFLOAD_FUNC(olEnqueueDataRead)
-OFFLOAD_FUNC(olEnqueueDataCopy)
+OFFLOAD_FUNC(olEnqueueMemcpy)
+OFFLOAD_FUNC(olEnqueueMemcpyHtoD)
+OFFLOAD_FUNC(olEnqueueMemcpyDtoH)
+OFFLOAD_FUNC(olEnqueueMemcpyDtoD)
OFFLOAD_FUNC(olEnqueueKernelLaunch)
OFFLOAD_FUNC(olCreateProgram)
OFFLOAD_FUNC(olRetainProgram)
@@ -51,6 +53,7 @@ OFFLOAD_FUNC(olGetDeviceCountWithCodeLoc)
OFFLOAD_FUNC(olGetDeviceWithCodeLoc)
OFFLOAD_FUNC(olGetDeviceInfoWithCodeLoc)
OFFLOAD_FUNC(olGetDeviceInfoSizeWithCodeLoc)
+OFFLOAD_FUNC(olGetHostDeviceWithCodeLoc)
OFFLOAD_FUNC(olMemAllocWithCodeLoc)
OFFLOAD_FUNC(olMemFreeWithCodeLoc)
OFFLOAD_FUNC(olCreateQueueWithCodeLoc)
@@ -60,9 +63,10 @@ OFFLOAD_FUNC(olFinishQueueWithCodeLoc)
OFFLOAD_FUNC(olRetainEventWithCodeLoc)
OFFLOAD_FUNC(olReleaseEventWithCodeLoc)
OFFLOAD_FUNC(olWaitEventWithCodeLoc)
-OFFLOAD_FUNC(olEnqueueDataWriteWithCodeLoc)
-OFFLOAD_FUNC(olEnqueueDataReadWithCodeLoc)
-OFFLOAD_FUNC(olEnqueueDataCopyWithCodeLoc)
+OFFLOAD_FUNC(olEnqueueMemcpyWithCodeLoc)
+OFFLOAD_FUNC(olEnqueueMemcpyHtoDWithCodeLoc)
+OFFLOAD_FUNC(olEnqueueMemcpyDtoHWithCodeLoc)
+OFFLOAD_FUNC(olEnqueueMemcpyDtoDWithCodeLoc)
OFFLOAD_FUNC(olEnqueueKernelLaunchWithCodeLoc)
OFFLOAD_FUNC(olCreateProgramWithCodeLoc)
OFFLOAD_FUNC(olRetainProgramWithCodeLoc)
diff --git a/offload/liboffload/include/generated/OffloadImplFuncDecls.inc b/offload/liboffload/include/generated/OffloadImplFuncDecls.inc
index 976422f3d7fd5..1fb77e3d278c9 100644
--- a/offload/liboffload/include/generated/OffloadImplFuncDecls.inc
+++ b/offload/liboffload/include/generated/OffloadImplFuncDecls.inc
@@ -37,6 +37,8 @@ ol_impl_result_t olGetDeviceInfoSize_impl(ol_device_handle_t Device,
ol_device_info_t PropName,
size_t *PropSizeRet);
+ol_impl_result_t olGetHostDevice_impl(ol_device_handle_t *Device);
+
ol_impl_result_t olMemAlloc_impl(ol_device_handle_t Device,
ol_alloc_type_t Type, size_t Size,
void **AllocationOut);
@@ -59,18 +61,25 @@ ol_impl_result_t olReleaseEvent_impl(ol_event_handle_t Event);
ol_impl_result_t olWaitEvent_impl(ol_event_handle_t Event);
-ol_impl_result_t olEnqueueDataWrite_impl(ol_queue_handle_t Queue, void *DstPtr,
- void *SrcPtr, size_t Size,
- ol_event_handle_t *EventOut);
-
-ol_impl_result_t olEnqueueDataRead_impl(ol_queue_handle_t Queue, void *DstPtr,
- void *SrcPtr, size_t Size,
- ol_event_handle_t *EventOut);
-
-ol_impl_result_t olEnqueueDataCopy_impl(ol_queue_handle_t Queue,
- ol_device_handle_t DstDevice,
- void *DstPtr, void *SrcPtr, size_t Size,
- ol_event_handle_t *EventOut);
+ol_impl_result_t olEnqueueMemcpy_impl(ol_queue_handle_t Queue, void *DstPtr,
+ ol_device_handle_t DstDevice,
+ void *SrcPtr,
+ ol_device_handle_t SrcDevice, size_t Size,
+ ol_event_handle_t *EventOut);
+
+ol_impl_result_t olEnqueueMemcpyHtoD_impl(ol_queue_handle_t Queue, void *DstPtr,
+ void *SrcPtr, size_t Size,
+ ol_event_handle_t *EventOut);
+
+ol_impl_result_t olEnqueueMemcpyDtoH_impl(ol_queue_handle_t Queue, void *DstPtr,
+ void *SrcPtr, size_t Size,
+ ol_event_handle_t *EventOut);
+
+ol_impl_result_t olEnqueueMemcpyDtoD_impl(ol_queue_handle_t Queue,
+ ol_device_handle_t DstDevice,
+ void *DstPtr, void *SrcPtr,
+ size_t Size,
+ ol_event_handle_t *EventOut);
ol_impl_result_t
olEnqueueKernelLaunch_impl(ol_queue_handle_t Queue, ol_kernel_handle_t Kernel,
diff --git a/offload/liboffload/include/generated/OffloadPrint.hpp b/offload/liboffload/include/generated/OffloadPrint.hpp
index 10dc58d1079a0..a5616f85ea8a3 100644
--- a/offload/liboffload/include/generated/OffloadPrint.hpp
+++ b/offload/liboffload/include/generated/OffloadPrint.hpp
@@ -475,6 +475,13 @@ operator<<(std::ostream &os,
return os;
}
+inline std::ostream &
+operator<<(std::ostream &os, const struct ol_get_host_device_params_t *params) {
+ os << ".Device = ";
+ printPtr(os, *params->pDevice);
+ return os;
+}
+
inline std::ostream &operator<<(std::ostream &os,
const struct ol_mem_alloc_params_t *params) {
os << ".Device = ";
@@ -556,9 +563,34 @@ inline std::ostream &operator<<(std::ostream &os,
return os;
}
+inline std::ostream &
+operator<<(std::ostream &os, const struct ol_enqueue_memcpy_params_t *params) {
+ os << ".Queue = ";
+ printPtr(os, *params->pQueue);
+ os << ", ";
+ os << ".DstPtr = ";
+ printPtr(os, *params->pDstPtr);
+ os << ", ";
+ os << ".DstDevice = ";
+ printPtr(os, *params->pDstDevice);
+ os << ", ";
+ os << ".SrcPtr = ";
+ printPtr(os, *params->pSrcPtr);
+ os << ", ";
+ os << ".SrcDevice = ";
+ printPtr(os, *params->pSrcDevice);
+ os << ", ";
+ os << ".Size = ";
+ os << *params->pSize;
+ os << ", ";
+ os << ".EventOut = ";
+ printPtr(os, *params->pEventOut);
+ return os;
+}
+
inline std::ostream &
operator<<(std::ostream &os,
- const struct ol_enqueue_data_write_params_t *params) {
+ const struct ol_enqueue_memcpy_hto_d_params_t *params) {
os << ".Queue = ";
printPtr(os, *params->pQueue);
os << ", ";
@@ -578,7 +610,7 @@ operator<<(std::ostream &os,
inline std::ostream &
operator<<(std::ostream &os,
- const struct ol_enqueue_data_read_params_t *params) {
+ const struct ol_enqueue_memcpy_dto_h_params_t *params) {
os << ".Queue = ";
printPtr(os, *params->pQueue);
os << ", ";
@@ -598,7 +630,7 @@ operator<<(std::ostream &os,
inline std::ostream &
operator<<(std::ostream &os,
- const struct ol_enqueue_data_copy_params_t *params) {
+ const struct ol_enqueue_memcpy_dto_d_params_t *params) {
os << ".Queue = ";
printPtr(os, *params->pQueue);
os << ", ";
diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp
index d395eb34a5f4a..63d5bdb1e8f61 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -27,7 +27,7 @@ using namespace llvm::omp::target::plugin;
// interface.
struct ol_device_impl_t {
int DeviceNum;
- GenericDeviceTy &Device;
+ GenericDeviceTy *Device;
ol_platform_handle_t Platform;
};
@@ -45,7 +45,6 @@ struct ol_queue_impl_t {
struct ol_event_impl_t {
void *EventInfo;
ol_queue_handle_t Queue;
- ol_device_handle_t Device;
std::atomic_uint32_t RefCount;
};
@@ -107,6 +106,11 @@ PlatformVecT &Platforms() {
return Platforms;
}
+ol_device_handle_t HostDevice() {
+ static ol_device_impl_t HostDeviceImpl{-1, nullptr, nullptr};
+ return &HostDeviceImpl;
+}
+
// TODO: Some plugins expect to be linked into libomptarget which defines these
// symbols to implement ompt callbacks. The least invasive workaround here is to
// define them in libLLVMOffload as false/null so they are never used. In future
@@ -144,7 +148,7 @@ void initPlugins() {
DevNum++) {
if (Platform.Plugin->init_device(DevNum) == OFFLOAD_SUCCESS) {
Platform.Devices.emplace_back(ol_device_impl_t{
- DevNum, Platform.Plugin->getDevice(DevNum), &Platform});
+ DevNum, &Platform.Plugin->getDevice(DevNum), &Platform});
}
}
}
@@ -260,7 +264,7 @@ ol_impl_result_t olGetDeviceInfoImplDetail(ol_device_handle_t Device,
ReturnHelper ReturnValue(PropSize, PropValue, PropSizeRet);
InfoQueueTy DevInfo;
- if (auto Err = Device->Device.obtainInfoImpl(DevInfo))
+ if (auto Err = Device->Device->obtainInfoImpl(DevInfo))
return OL_ERRC_OUT_OF_RESOURCES;
// Find the info if it exists under any of the given names
@@ -312,6 +316,11 @@ ol_impl_result_t olGetDeviceInfoSize_impl(ol_device_handle_t Device,
return olGetDeviceInfoImplDetail(Device, PropName, 0, nullptr, PropSizeRet);
}
+ol_impl_result_t olGetHostDevice_impl(ol_device_handle_t *Device) {
+ *Device = HostDevice();
+ return OL_SUCCESS;
+}
+
TargetAllocTy convertOlToPluginAllocTy(ol_alloc_type_t Type) {
switch (Type) {
case OL_ALLOC_TYPE_DEVICE:
@@ -328,7 +337,7 @@ ol_impl_result_t olMemAlloc_impl(ol_device_handle_t Device,
ol_alloc_type_t Type, size_t Size,
void **AllocationOut) {
auto Alloc =
- Device->Device.dataAlloc(Size, nullptr, convertOlToPluginAllocTy(Type));
+ Device->Device->dataAlloc(Size, nullptr, convertOlToPluginAllocTy(Type));
if (!Alloc)
return {OL_ERRC_OUT_OF_RESOURCES,
formatv("Could not create allocation on device {0}", Device).str()};
@@ -339,7 +348,8 @@ ol_impl_result_t olMemAlloc_impl(ol_device_handle_t Device,
ol_impl_result_t olMemFree_impl(ol_device_handle_t Device, ol_alloc_type_t Type,
void *Address) {
- auto Res = Device->Device.dataDelete(Address, convertOlToPluginAllocTy(Type));
+ auto Res =
+ Device->Device->dataDelete(Address, convertOlToPluginAllocTy(Type));
if (Res)
return {OL_ERRC_OUT_OF_RESOURCES, "Could not free allocation"};
@@ -349,7 +359,7 @@ ol_impl_result_t olMemFree_impl(ol_device_handle_t Device, ol_alloc_type_t Type,
ol_impl_result_t olCreateQueue_impl(ol_device_handle_t Device,
ol_queue_handle_t *Queue) {
auto CreatedQueue = std::make_unique<ol_queue_impl_t>();
- auto Err = Device->Device.initAsyncInfo(&(CreatedQueue->AsyncInfo));
+ auto Err = Device->Device->initAsyncInfo(&(CreatedQueue->AsyncInfo));
if (Err)
return {OL_ERRC_UNKNOWN, "Could not initialize stream resource"};
@@ -375,7 +385,7 @@ ol_impl_result_t olFinishQueue_impl(ol_queue_handle_t Queue) {
// Host plugin doesn't have a queue set so it's not safe to call synchronize
// on it, but we have nothing to synchronize in that situation anyway.
if (Queue->AsyncInfo->Queue) {
- auto Err = Queue->Device->Device.synchronize(Queue->AsyncInfo);
+ auto Err = Queue->Device->Device->synchronize(Queue->AsyncInfo);
if (Err)
return {OL_ERRC_INVALID_QUEUE, "The queue failed to synchronize"};
}
@@ -383,7 +393,7 @@ ol_impl_result_t olFinishQueue_impl(ol_queue_handle_t Queue) {
// Recreate the stream resource so the queue can be reused
// TODO: Would be easier for the synchronization to (optionally) not release
// it to begin with.
- auto Res = Queue->Device->Device.initAsyncInfo(&Queue->AsyncInfo);
+ auto Res = Queue->Device->Device->initAsyncInfo(&Queue->AsyncInfo);
if (Res)
return {OL_ERRC_UNKNOWN, "Could not reinitialize the stream resource"};
@@ -391,7 +401,7 @@ ol_impl_result_t olFinishQueue_impl(ol_queue_handle_t Queue) {
}
ol_impl_result_t olWaitEvent_impl(ol_event_handle_t Event) {
- auto Res = Event->Device->Device.syncEvent(Event->EventInfo);
+ auto Res = Event->Queue->Device->Device->syncEvent(Event->EventInfo);
if (Res)
return {OL_ERRC_INVALID_EVENT, "The event failed to synchronize"};
@@ -413,24 +423,59 @@ ol_impl_result_t olReleaseEvent_impl(ol_event_handle_t Event) {
ol_event_handle_t makeEvent(ol_queue_handle_t Queue) {
auto EventImpl = std::make_unique<ol_event_impl_t>();
EventImpl->Queue = Queue;
- auto Res = Queue->Device->Device.createEvent(&EventImpl->EventInfo);
+ auto Res = Queue->Device->Device->createEvent(&EventImpl->EventInfo);
if (Res)
return nullptr;
- Res =
- Queue->Device->Device.recordEvent(EventImpl->EventInfo, Queue->AsyncInfo);
+ Res = Queue->Device->Device->recordEvent(EventImpl->EventInfo,
+ Queue->AsyncInfo);
if (Res)
return nullptr;
return EventImpl.release();
}
-ol_impl_result_t olEnqueueDataWrite_impl(ol_queue_handle_t Queue, void *DstPtr,
- void *SrcPtr, size_t Size,
- ol_event_handle_t *EventOut) {
- auto &DeviceImpl = Queue->Device->Device;
+ol_impl_result_t olEnqueueMemcpy_impl(ol_queue_handle_t Queue, void *DstPtr,
+ ol_device_handle_t DstDevice,
+ void *SrcPtr,
+ ol_device_handle_t SrcDevice, size_t Size,
+ ol_event_handle_t *EventOut) {
+ if (DstDevice == HostDevice() && SrcDevice == HostDevice()) {
+ // TODO: We could actually handle this with a plain memcpy but we currently
+ // have no way of synchronizing this with the queue
+ return {OL_ERRC_INVALID_ARGUMENT,
+ "One of DstDevice and SrcDevice must be a non-host device"};
+ }
+
+ if (DstDevice == HostDevice()) {
+ auto Res =
+ SrcDevice->Device->dataRetrieve(DstPtr, SrcPtr, Size, Queue->AsyncInfo);
+ if (Res)
+ return {OL_ERRC_UNKNOWN, "The data retrieve operation failed"};
+ } else if (SrcDevice == HostDevice()) {
+ auto Res =
+ DstDevice->Device->dataSubmit(DstPtr, SrcPtr, Size, Queue->AsyncInfo);
+ if (Res)
+ return {OL_ERRC_UNKNOWN, "The data submit operation failed"};
+ } else {
+ auto Res = SrcDevice->Device->dataExchange(SrcPtr, *DstDevice->Device,
+ DstPtr, Size, Queue->AsyncInfo);
+ if (Res)
+ return {OL_ERRC_UNKNOWN, "The data exchange operation failed"};
+ }
+
+ if (EventOut)
+ *EventOut = makeEvent(Queue);
+
+ return OL_SUCCESS;
+}
+
+ol_impl_result_t olEnqueueMemcpyHtoD_impl(ol_queue_handle_t Queue, void *DstPtr,
+ void *SrcPtr, size_t Size,
+ ol_event_handle_t *EventOut) {
+ auto *DeviceImpl = Queue->Device->Device;
- auto Res = DeviceImpl.dataSubmit(DstPtr, SrcPtr, Size, Queue->AsyncInfo);
+ auto Res = DeviceImpl->dataSubmit(DstPtr, SrcPtr, Size, Queue->AsyncInfo);
if (Res)
return {OL_ERRC_UNKNOWN, "The data submit operation failed"};
@@ -441,12 +486,12 @@ ol_impl_result_t olEnqueueDataWrite_impl(ol_queue_handle_t Queue, void *DstPtr,
return OL_SUCCESS;
}
-ol_impl_result_t olEnqueueDataRead_impl(ol_queue_handle_t Queue, void *DstPtr,
- void *SrcPtr, size_t Size,
- ol_event_handle_t *EventOut) {
- auto &DeviceImpl = Queue->Device->Device;
+ol_impl_result_t olEnqueueMemcpyDtoH_impl(ol_queue_handle_t Queue, void *DstPtr,
+ void *SrcPtr, size_t Size,
+ ol_event_handle_t *EventOut) {
+ auto *DeviceImpl = Queue->Device->Device;
- auto Res = DeviceImpl.dataRetrieve(DstPtr, SrcPtr, Size, Queue->AsyncInfo);
+ auto Res = DeviceImpl->dataRetrieve(DstPtr, SrcPtr, Size, Queue->AsyncInfo);
if (Res)
return {OL_ERRC_UNKNOWN, "The data retrieve operation failed"};
@@ -457,14 +502,15 @@ ol_impl_result_t olEnqueueDataRead_impl(ol_queue_handle_t Queue, void *DstPtr,
return OL_SUCCESS;
}
-ol_impl_result_t olEnqueueDataCopy_impl(ol_queue_handle_t Queue,
- ol_device_handle_t DstDevice,
- void *DstPtr, void *SrcPtr, size_t Size,
- ol_event_handle_t *EventOut) {
- auto &DeviceImpl = Queue->Device->Device;
+ol_impl_result_t olEnqueueMemcpyDtoD_impl(ol_queue_handle_t Queue,
+ ol_device_handle_t DstDevice,
+ void *DstPtr, void *SrcPtr,
+ size_t Size,
+ ol_event_handle_t *EventOut) {
+ auto *DeviceImpl = Queue->Device->Device;
- auto Res = DeviceImpl.dataExchange(SrcPtr, DstDevice->Device, DstPtr, Size,
- Queue->AsyncInfo);
+ auto Res = DeviceImpl->dataExchange(SrcPtr, *DstDevice->Device, DstPtr, Size,
+ Queue->AsyncInfo);
if (Res)
return {OL_ERRC_UNKNOWN, "The data exchange operation failed"};
@@ -489,7 +535,7 @@ ol_impl_result_t olCreateProgram_impl(ol_device_handle_t Device, void *ProgData,
ol_program_handle_t Prog = new ol_program_impl_t();
- auto Res = Device->Device.loadBinary(Device->Device.Plugin, &DeviceImage);
+ auto Res = Device->Device->loadBinary(Device->Device->Plugin, &DeviceImage);
if (!Res)
return OL_ERRC_INVALID_VALUE;
@@ -559,9 +605,9 @@ ol_impl_result_t
olEnqueueKernelLaunch_impl(ol_queue_handle_t Queue, ol_kernel_handle_t Kernel,
const ol_kernel_launch_size_args_t *LaunchSizeArgs,
ol_event_handle_t *EventOut) {
- auto &DeviceImpl = Queue->Device->Device;
+ auto *DeviceImpl = Queue->Device->Device;
- AsyncInfoWrapperTy AsyncInfoWrapper(DeviceImpl, Queue->AsyncInfo);
+ AsyncInfoWrapperTy AsyncInfoWrapper(*DeviceImpl, Queue->AsyncInfo);
KernelArgsTy LaunchArgs{};
LaunchArgs.NumArgs = Kernel->Args.getPointers().size();
@@ -578,7 +624,7 @@ olEnqueueKernelLaunch_impl(ol_queue_handle_t Queue, ol_kernel_handle_t Kernel,
// No offsets needed, arguments are real pointers
auto ArgOffsets = std::vector<ptrdiff_t>(LaunchArgs.NumArgs, 0ul);
- auto Err = Kernel->KernelImpl->launch(DeviceImpl, LaunchArgs.ArgPtrs,
+ auto Err = Kernel->KernelImpl->launch(*DeviceImpl, LaunchArgs.ArgPtrs,
ArgOffsets.data(), LaunchArgs,
AsyncInfoWrapper);
diff --git a/offload/unittests/OffloadAPI/CMakeLists.txt b/offload/unittests/OffloadAPI/CMakeLists.txt
index c7f28d147db14..d942d244beadd 100644
--- a/offload/unittests/OffloadAPI/CMakeLists.txt
+++ b/offload/unittests/OffloadAPI/CMakeLists.txt
@@ -20,6 +20,7 @@ add_libompt_unittest("offload.unittests"
${CMAKE_CURRENT_SOURCE_DIR}/enqueue/olEnqueueDataWrite.cpp
${CMAKE_CURRENT_SOURCE_DIR}/enqueue/olEnqueueDataRead.cpp
${CMAKE_CURRENT_SOURCE_DIR}/enqueue/olEnqueueDataCopy.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/enqueue/olEnqueueMemcpy.cpp
)
add_dependencies("offload.unittests" ${PLUGINS_TEST_COMMON})
target_link_libraries("offload.unittests" PRIVATE ${PLUGINS_TEST_COMMON})
diff --git a/offload/unittests/OffloadAPI/enqueue/olEnqueueDataCopy.cpp b/offload/unittests/OffloadAPI/enqueue/olEnqueueDataCopy.cpp
index dc2791266fa14..84aa88009fce8 100644
--- a/offload/unittests/OffloadAPI/enqueue/olEnqueueDataCopy.cpp
+++ b/offload/unittests/OffloadAPI/enqueue/olEnqueueDataCopy.cpp
@@ -1,4 +1,4 @@
-//===------- Offload API tests - olEnqueueDataCopy ------------------------===//
+//===------- Offload API tests - olEnqueueMemcpyDtoD ----------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -10,9 +10,9 @@
#include <OffloadAPI.h>
#include <gtest/gtest.h>
-using olEnqueueDataCopyTest = offloadQueueTest;
+using olEnqueueMemcpyDtoDTest = offloadQueueTest;
-TEST_F(olEnqueueDataCopyTest, Success) {
+TEST_F(olEnqueueMemcpyDtoDTest, Success) {
constexpr size_t Size = 1024;
void *AllocA;
void *AllocB;
@@ -22,11 +22,11 @@ TEST_F(olEnqueueDataCopyTest, Success) {
ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, Size, &AllocA));
ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, Size, &AllocB));
ASSERT_SUCCESS(
- olEnqueueDataWrite(Queue, AllocA, Input.data(), Size, nullptr));
+ olEnqueueMemcpyHtoD(Queue, AllocA, Input.data(), Size, nullptr));
ASSERT_SUCCESS(
- olEnqueueDataCopy(Queue, Device, AllocB, AllocA, Size, nullptr));
+ olEnqueueMemcpyDtoD(Queue, Device, AllocB, AllocA, Size, nullptr));
ASSERT_SUCCESS(
- olEnqueueDataRead(Queue, Output.data(), AllocB, Size, nullptr));
+ olEnqueueMemcpyDtoH(Queue, Output.data(), AllocB, Size, nullptr));
ASSERT_SUCCESS(olFinishQueue(Queue));
for (uint8_t Val : Output) {
ASSERT_EQ(Val, 42);
diff --git a/offload/unittests/OffloadAPI/enqueue/olEnqueueDataRead.cpp b/offload/unittests/OffloadAPI/enqueue/olEnqueueDataRead.cpp
index 71323e4b44817..d9e2be4146934 100644
--- a/offload/unittests/OffloadAPI/enqueue/olEnqueueDataRead.cpp
+++ b/offload/unittests/OffloadAPI/enqueue/olEnqueueDataRead.cpp
@@ -1,4 +1,4 @@
-//===------- Offload API tests - olEnqueueDataRead ------------------------===//
+//===------- Offload API tests - olEnqueueMemcpyDtoH ----------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -10,17 +10,19 @@
#include <OffloadAPI.h>
#include <gtest/gtest.h>
-using olEnqueueDataReadTest = offloadQueueTest;
+using olEnqueueMemcpyDtoHTest = offloadQueueTest;
-TEST_F(olEnqueueDataReadTest, Success) {
+TEST_F(olEnqueueMemcpyDtoHTest, Success) {
constexpr size_t Size = 1024;
void *Alloc;
std::vector<uint8_t> Input(Size, 42);
std::vector<uint8_t> Output(Size, 0);
ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, Size, &Alloc));
- ASSERT_SUCCESS(olEnqueueDataWrite(Queue, Alloc, Input.data(), Size, nullptr));
- ASSERT_SUCCESS(olEnqueueDataRead(Queue, Output.data(), Alloc, Size, nullptr));
+ ASSERT_SUCCESS(
+ olEnqueueMemcpyHtoD(Queue, Alloc, Input.data(), Size, nullptr));
+ ASSERT_SUCCESS(
+ olEnqueueMemcpyDtoH(Queue, Output.data(), Alloc, Size, nullptr));
ASSERT_SUCCESS(olFinishQueue(Queue));
for (uint8_t Val : Output) {
ASSERT_EQ(Val, 42);
diff --git a/offload/unittests/OffloadAPI/enqueue/olEnqueueDataWrite.cpp b/offload/unittests/OffloadAPI/enqueue/olEnqueueDataWrite.cpp
index f60d501a9918d..81d338abcbd8c 100644
--- a/offload/unittests/OffloadAPI/enqueue/olEnqueueDataWrite.cpp
+++ b/offload/unittests/OffloadAPI/enqueue/olEnqueueDataWrite.cpp
@@ -1,4 +1,4 @@
-//===------- Offload API tests - olEnqueueDataWrite -----------------------===//
+//===------- Offload API tests - olEnqueueMemcpyHtoD ----------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -10,14 +10,15 @@
#include <OffloadAPI.h>
#include <gtest/gtest.h>
-using olEnqueueDataWriteTest = offloadQueueTest;
+using olEnqueueMemcpyHtoDTest = offloadQueueTest;
-TEST_F(olEnqueueDataWriteTest, Success) {
+TEST_F(olEnqueueMemcpyHtoDTest, Success) {
constexpr size_t Size = 1024;
void *Alloc;
ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, Size, &Alloc));
std::vector<uint8_t> Input(Size, 42);
- ASSERT_SUCCESS(olEnqueueDataWrite(Queue, Alloc, Input.data(), Size, nullptr));
+ ASSERT_SUCCESS(
+ olEnqueueMemcpyHtoD(Queue, Alloc, Input.data(), Size, nullptr));
olFinishQueue(Queue);
olMemFree(Device, OL_ALLOC_TYPE_DEVICE, Alloc);
}
diff --git a/offload/unittests/OffloadAPI/enqueue/olEnqueueMemcpy.cpp b/offload/unittests/OffloadAPI/enqueue/olEnqueueMemcpy.cpp
new file mode 100644
index 0000000000000..13be31fc8d801
--- /dev/null
+++ b/offload/unittests/OffloadAPI/enqueue/olEnqueueMemcpy.cpp
@@ -0,0 +1,71 @@
+//===------- Offload API tests - olEnqueueMemcpy --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "../common/Fixtures.hpp"
+#include <OffloadAPI.h>
+#include <gtest/gtest.h>
+
+using olEnqueueMemcpyTest = offloadQueueTest;
+
+TEST_F(olEnqueueMemcpyTest, SuccessH2D) {
+ constexpr size_t Size = 1024;
+ void *Alloc;
+ ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, Size, &Alloc));
+ std::vector<uint8_t> Input(Size, 42);
+ ol_device_handle_t Host;
+ ASSERT_SUCCESS(olGetHostDevice(&Host));
+ ASSERT_SUCCESS(
+ olEnqueueMemcpy(Queue, Alloc, Device, Input.data(), Host, Size, nullptr));
+ olFinishQueue(Queue);
+ olMemFree(Device, OL_ALLOC_TYPE_DEVICE, Alloc);
+}
+
+TEST_F(olEnqueueMemcpyTest, SuccessDtoH) {
+ constexpr size_t Size = 1024;
+ void *Alloc;
+ std::vector<uint8_t> Input(Size, 42);
+ std::vector<uint8_t> Output(Size, 0);
+ ol_device_handle_t Host;
+ ASSERT_SUCCESS(olGetHostDevice(&Host));
+
+ ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, Size, &Alloc));
+ ASSERT_SUCCESS(
+ olEnqueueMemcpy(Queue, Alloc, Device, Input.data(), Host, Size, nullptr));
+ ASSERT_SUCCESS(olEnqueueMemcpy(Queue, Output.data(), Host, Alloc, Device,
+ Size, nullptr));
+ ASSERT_SUCCESS(olFinishQueue(Queue));
+ for (uint8_t Val : Output) {
+ ASSERT_EQ(Val, 42);
+ }
+ ASSERT_SUCCESS(olMemFree(Device, OL_ALLOC_TYPE_DEVICE, Alloc));
+}
+
+TEST_F(olEnqueueMemcpyTest, SuccessDtoD) {
+ constexpr size_t Size = 1024;
+ void *AllocA;
+ void *AllocB;
+ std::vector<uint8_t> Input(Size, 42);
+ std::vector<uint8_t> Output(Size, 0);
+ ol_device_handle_t Host;
+ ASSERT_SUCCESS(olGetHostDevice(&Host));
+
+ ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, Size, &AllocA));
+ ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, Size, &AllocB));
+ ASSERT_SUCCESS(olEnqueueMemcpy(Queue, AllocA, Device, Input.data(), Host,
+ Size, nullptr));
+ ASSERT_SUCCESS(
+ olEnqueueMemcpy(Queue, AllocB, Device, AllocA, Device, Size, nullptr));
+ ASSERT_SUCCESS(olEnqueueMemcpy(Queue, Output.data(), Host, AllocB, Device,
+ Size, nullptr));
+ ASSERT_SUCCESS(olFinishQueue(Queue));
+ for (uint8_t Val : Output) {
+ ASSERT_EQ(Val, 42);
+ }
+ ASSERT_SUCCESS(olMemFree(Device, OL_ALLOC_TYPE_DEVICE, AllocA));
+ ASSERT_SUCCESS(olMemFree(Device, OL_ALLOC_TYPE_DEVICE, AllocB));
+}
More information about the llvm-commits
mailing list