[llvm] Draft: Implement the remaining initial Offload API (PR #122106)

Callum Fare via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 8 05:13:02 PST 2025


https://github.com/callumfare created https://github.com/llvm/llvm-project/pull/122106

Implement the complete initial version of the Offload API, to the extent that is usable for simple offloading programs. Tested with a basic SYCL program.

TODO:
- Remaining kernel launch parameters, passed via struct.
- Support for global variables

>From 7cbe788ddc0de682ce0f939caf4619e99889f992 Mon Sep 17 00:00:00 2001
From: Callum Fare <callum at codeplay.com>
Date: Wed, 11 Dec 2024 12:08:44 +0000
Subject: [PATCH 1/3] WIP: Implement olMemAlloc, olMemFree

---
 offload/liboffload/API/Memory.td              | 45 +++++++++
 offload/liboffload/API/OffloadAPI.td          |  1 +
 .../liboffload/include/generated/OffloadAPI.h | 95 +++++++++++++++++++
 .../include/generated/OffloadEntryPoints.inc  | 93 ++++++++++++++++++
 .../include/generated/OffloadFuncs.inc        |  4 +
 .../generated/OffloadImplFuncDecls.inc        |  7 ++
 .../include/generated/OffloadPrint.hpp        | 53 +++++++++++
 offload/liboffload/src/OffloadImpl.cpp        | 35 +++++++
 8 files changed, 333 insertions(+)
 create mode 100644 offload/liboffload/API/Memory.td

diff --git a/offload/liboffload/API/Memory.td b/offload/liboffload/API/Memory.td
new file mode 100644
index 00000000000000..8cfaf70311e346
--- /dev/null
+++ b/offload/liboffload/API/Memory.td
@@ -0,0 +1,45 @@
+//===-- Memory.td - Memory definitions for Offload ---------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains Offload API definitions related to memory allocations
+//
+//===----------------------------------------------------------------------===//
+
+def : Enum {
+  let name = "ol_alloc_type_t";
+  let desc = "Represents the type of allocation made with olMemAlloc";
+  let etors = [
+    Etor<"HOST", "Host allocation">,
+    Etor<"DEVICE", "Device allocation">,
+    Etor<"SHARED", "Shared allocation">
+  ];
+}
+
+def : Function {
+  let name = "olMemAlloc";
+  let desc = "Creates a memory allocation on the specified device";
+  let params = [
+    Param<"ol_device_handle_t", "Device", "handle of the device to allocate on", PARAM_IN>,
+    Param<"ol_alloc_type_t", "Type", "type of the allocation", PARAM_IN>,
+    Param<"size_t", "Size", "size of the allocation in bytes", PARAM_IN>,
+    Param<"size_t", "Aligment", "alignment of the allocation in bytes", PARAM_IN>,
+    Param<"void**", "AllocationOut", "output for the allocated pointer", PARAM_OUT>
+  ];
+  let returns = [];
+}
+
+def : Function {
+  let name = "olMemFree";
+  let desc = "Frees a memory allocation previously made by olMemAlloc";
+  let params = [
+    Param<"ol_device_handle_t", "Device", "handle of the device to allocate on", PARAM_IN>,
+    Param<"ol_alloc_type_t", "Type", "type of the allocation", PARAM_IN>,
+    Param<"void*", "Address", "address of the allocation to free", PARAM_IN>,
+  ];
+  let returns = [];
+}
diff --git a/offload/liboffload/API/OffloadAPI.td b/offload/liboffload/API/OffloadAPI.td
index 8a0c3c40581223..a609cc7ac80b41 100644
--- a/offload/liboffload/API/OffloadAPI.td
+++ b/offload/liboffload/API/OffloadAPI.td
@@ -13,3 +13,4 @@ include "APIDefs.td"
 include "Common.td"
 include "Platform.td"
 include "Device.td"
+include "Memory.td"
diff --git a/offload/liboffload/include/generated/OffloadAPI.h b/offload/liboffload/include/generated/OffloadAPI.h
index 11fcc96625ab8d..81f3a8e0201bad 100644
--- a/offload/liboffload/include/generated/OffloadAPI.h
+++ b/offload/liboffload/include/generated/OffloadAPI.h
@@ -460,6 +460,67 @@ OL_APIEXPORT ol_result_t OL_APICALL olGetDeviceInfoSize(
     // [out] pointer to the number of bytes required to store the query
     size_t *PropSizeRet);
 
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Represents the type of allocation made with olMemAlloc
+typedef enum ol_alloc_type_t {
+  /// Host allocation
+  OL_ALLOC_TYPE_HOST = 0,
+  /// Device allocation
+  OL_ALLOC_TYPE_DEVICE = 1,
+  /// Shared allocation
+  OL_ALLOC_TYPE_SHARED = 2,
+  /// @cond
+  OL_ALLOC_TYPE_FORCE_UINT32 = 0x7fffffff
+  /// @endcond
+
+} ol_alloc_type_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Creates a memory allocation on the specified device
+///
+/// @details
+///
+/// @returns
+///     - ::OL_RESULT_SUCCESS
+///     - ::OL_ERRC_UNINITIALIZED
+///     - ::OL_ERRC_DEVICE_LOST
+///     - ::OL_ERRC_INVALID_NULL_HANDLE
+///         + `NULL == Device`
+///     - ::OL_ERRC_INVALID_NULL_POINTER
+///         + `NULL == AllocationOut`
+OL_APIEXPORT ol_result_t OL_APICALL olMemAlloc(
+    // [in] handle of the device to allocate on
+    ol_device_handle_t Device,
+    // [in] type of the allocation
+    ol_alloc_type_t Type,
+    // [in] size of the allocation in bytes
+    size_t Size,
+    // [in] alignment of the allocation in bytes
+    size_t Aligment,
+    // [out] output for the allocated pointer
+    void **AllocationOut);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Frees a memory allocation previously made by olMemAlloc
+///
+/// @details
+///
+/// @returns
+///     - ::OL_RESULT_SUCCESS
+///     - ::OL_ERRC_UNINITIALIZED
+///     - ::OL_ERRC_DEVICE_LOST
+///     - ::OL_ERRC_INVALID_NULL_HANDLE
+///         + `NULL == Device`
+///     - ::OL_ERRC_INVALID_NULL_POINTER
+///         + `NULL == Address`
+OL_APIEXPORT ol_result_t OL_APICALL olMemFree(
+    // [in] handle of the device to allocate on
+    ol_device_handle_t Device,
+    // [in] type of the allocation
+    ol_alloc_type_t Type,
+    // [in] address of the allocation to free
+    void *Address);
+
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Function parameters for olGetPlatform
 /// @details Each entry is a pointer to the parameter passed to the function;
@@ -530,6 +591,26 @@ typedef struct ol_get_device_info_size_params_t {
   size_t **pPropSizeRet;
 } ol_get_device_info_size_params_t;
 
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Function parameters for olMemAlloc
+/// @details Each entry is a pointer to the parameter passed to the function;
+typedef struct ol_mem_alloc_params_t {
+  ol_device_handle_t *pDevice;
+  ol_alloc_type_t *pType;
+  size_t *pSize;
+  size_t *pAligment;
+  void ***pAllocationOut;
+} ol_mem_alloc_params_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Function parameters for olMemFree
+/// @details Each entry is a pointer to the parameter passed to the function;
+typedef struct ol_mem_free_params_t {
+  ol_device_handle_t *pDevice;
+  ol_alloc_type_t *pType;
+  void **pAddress;
+} ol_mem_free_params_t;
+
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Variant of olInit that also sets source code location information
 /// @details See also ::olInit
@@ -605,6 +686,20 @@ OL_APIEXPORT ol_result_t OL_APICALL olGetDeviceInfoSizeWithCodeLoc(
     ol_device_handle_t Device, ol_device_info_t PropName, size_t *PropSizeRet,
     ol_code_location_t *CodeLocation);
 
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Variant of olMemAlloc that also sets source code location information
+/// @details See also ::olMemAlloc
+OL_APIEXPORT ol_result_t OL_APICALL olMemAllocWithCodeLoc(
+    ol_device_handle_t Device, ol_alloc_type_t Type, size_t Size,
+    size_t Aligment, void **AllocationOut, ol_code_location_t *CodeLocation);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Variant of olMemFree that also sets source code location information
+/// @details See also ::olMemFree
+OL_APIEXPORT ol_result_t OL_APICALL
+olMemFreeWithCodeLoc(ol_device_handle_t Device, ol_alloc_type_t Type,
+                     void *Address, ol_code_location_t *CodeLocation);
+
 #if defined(__cplusplus)
 } // extern "C"
 #endif
diff --git a/offload/liboffload/include/generated/OffloadEntryPoints.inc b/offload/liboffload/include/generated/OffloadEntryPoints.inc
index 49c1c8169615e5..08060dae80f035 100644
--- a/offload/liboffload/include/generated/OffloadEntryPoints.inc
+++ b/offload/liboffload/include/generated/OffloadEntryPoints.inc
@@ -439,3 +439,96 @@ ol_result_t olGetDeviceInfoSizeWithCodeLoc(ol_device_handle_t Device,
   currentCodeLocation() = nullptr;
   return Result;
 }
+
+///////////////////////////////////////////////////////////////////////////////
+ol_impl_result_t olMemAlloc_val(ol_device_handle_t Device, ol_alloc_type_t Type,
+                                size_t Size, size_t Aligment,
+                                void **AllocationOut) {
+  if (true /*enableParameterValidation*/) {
+    if (NULL == Device) {
+      return OL_ERRC_INVALID_NULL_HANDLE;
+    }
+
+    if (NULL == AllocationOut) {
+      return OL_ERRC_INVALID_NULL_POINTER;
+    }
+  }
+
+  return olMemAlloc_impl(Device, Type, Size, Aligment, AllocationOut);
+}
+OL_APIEXPORT ol_result_t OL_APICALL olMemAlloc(ol_device_handle_t Device,
+                                               ol_alloc_type_t Type,
+                                               size_t Size, size_t Aligment,
+                                               void **AllocationOut) {
+  if (offloadConfig().TracingEnabled) {
+    std::cout << "---> olMemAlloc";
+  }
+
+  ol_result_t Result =
+      olMemAlloc_val(Device, Type, Size, Aligment, AllocationOut);
+
+  if (offloadConfig().TracingEnabled) {
+    ol_mem_alloc_params_t Params = {&Device, &Type, &Size, &Aligment,
+                                    &AllocationOut};
+    std::cout << "(" << &Params << ")";
+    std::cout << "-> " << Result << "\n";
+    if (Result && Result->Details) {
+      std::cout << "     *Error Details* " << Result->Details << " \n";
+    }
+  }
+  return Result;
+}
+ol_result_t olMemAllocWithCodeLoc(ol_device_handle_t Device,
+                                  ol_alloc_type_t Type, size_t Size,
+                                  size_t Aligment, void **AllocationOut,
+                                  ol_code_location_t *CodeLocation) {
+  currentCodeLocation() = CodeLocation;
+  ol_result_t Result = olMemAlloc(Device, Type, Size, Aligment, AllocationOut);
+
+  currentCodeLocation() = nullptr;
+  return Result;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+ol_impl_result_t olMemFree_val(ol_device_handle_t Device, ol_alloc_type_t Type,
+                               void *Address) {
+  if (true /*enableParameterValidation*/) {
+    if (NULL == Device) {
+      return OL_ERRC_INVALID_NULL_HANDLE;
+    }
+
+    if (NULL == Address) {
+      return OL_ERRC_INVALID_NULL_POINTER;
+    }
+  }
+
+  return olMemFree_impl(Device, Type, Address);
+}
+OL_APIEXPORT ol_result_t OL_APICALL olMemFree(ol_device_handle_t Device,
+                                              ol_alloc_type_t Type,
+                                              void *Address) {
+  if (offloadConfig().TracingEnabled) {
+    std::cout << "---> olMemFree";
+  }
+
+  ol_result_t Result = olMemFree_val(Device, Type, Address);
+
+  if (offloadConfig().TracingEnabled) {
+    ol_mem_free_params_t Params = {&Device, &Type, &Address};
+    std::cout << "(" << &Params << ")";
+    std::cout << "-> " << Result << "\n";
+    if (Result && Result->Details) {
+      std::cout << "     *Error Details* " << Result->Details << " \n";
+    }
+  }
+  return Result;
+}
+ol_result_t olMemFreeWithCodeLoc(ol_device_handle_t Device,
+                                 ol_alloc_type_t Type, void *Address,
+                                 ol_code_location_t *CodeLocation) {
+  currentCodeLocation() = CodeLocation;
+  ol_result_t Result = olMemFree(Device, Type, Address);
+
+  currentCodeLocation() = nullptr;
+  return Result;
+}
diff --git a/offload/liboffload/include/generated/OffloadFuncs.inc b/offload/liboffload/include/generated/OffloadFuncs.inc
index 48115493c790f4..26120f18279dcc 100644
--- a/offload/liboffload/include/generated/OffloadFuncs.inc
+++ b/offload/liboffload/include/generated/OffloadFuncs.inc
@@ -20,6 +20,8 @@ OFFLOAD_FUNC(olGetDeviceCount)
 OFFLOAD_FUNC(olGetDevice)
 OFFLOAD_FUNC(olGetDeviceInfo)
 OFFLOAD_FUNC(olGetDeviceInfoSize)
+OFFLOAD_FUNC(olMemAlloc)
+OFFLOAD_FUNC(olMemFree)
 OFFLOAD_FUNC(olInitWithCodeLoc)
 OFFLOAD_FUNC(olShutDownWithCodeLoc)
 OFFLOAD_FUNC(olGetPlatformWithCodeLoc)
@@ -30,5 +32,7 @@ OFFLOAD_FUNC(olGetDeviceCountWithCodeLoc)
 OFFLOAD_FUNC(olGetDeviceWithCodeLoc)
 OFFLOAD_FUNC(olGetDeviceInfoWithCodeLoc)
 OFFLOAD_FUNC(olGetDeviceInfoSizeWithCodeLoc)
+OFFLOAD_FUNC(olMemAllocWithCodeLoc)
+OFFLOAD_FUNC(olMemFreeWithCodeLoc)
 
 #undef OFFLOAD_FUNC
diff --git a/offload/liboffload/include/generated/OffloadImplFuncDecls.inc b/offload/liboffload/include/generated/OffloadImplFuncDecls.inc
index 5b26b2653a05d9..f0a96081fd2431 100644
--- a/offload/liboffload/include/generated/OffloadImplFuncDecls.inc
+++ b/offload/liboffload/include/generated/OffloadImplFuncDecls.inc
@@ -36,3 +36,10 @@ ol_impl_result_t olGetDeviceInfo_impl(ol_device_handle_t Device,
 ol_impl_result_t olGetDeviceInfoSize_impl(ol_device_handle_t Device,
                                           ol_device_info_t PropName,
                                           size_t *PropSizeRet);
+
+ol_impl_result_t olMemAlloc_impl(ol_device_handle_t Device,
+                                 ol_alloc_type_t Type, size_t Size,
+                                 size_t Aligment, void **AllocationOut);
+
+ol_impl_result_t olMemFree_impl(ol_device_handle_t Device, ol_alloc_type_t Type,
+                                void *Address);
diff --git a/offload/liboffload/include/generated/OffloadPrint.hpp b/offload/liboffload/include/generated/OffloadPrint.hpp
index 8981bb054a4cb1..cff754237568e6 100644
--- a/offload/liboffload/include/generated/OffloadPrint.hpp
+++ b/offload/liboffload/include/generated/OffloadPrint.hpp
@@ -31,6 +31,7 @@ inline std::ostream &operator<<(std::ostream &os,
                                 enum ol_platform_backend_t value);
 inline std::ostream &operator<<(std::ostream &os, enum ol_device_type_t value);
 inline std::ostream &operator<<(std::ostream &os, enum ol_device_info_t value);
+inline std::ostream &operator<<(std::ostream &os, enum ol_alloc_type_t value);
 
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Print operator for the ol_errc_t type
@@ -274,6 +275,26 @@ inline void printTagged(std::ostream &os, const void *ptr,
     break;
   }
 }
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Print operator for the ol_alloc_type_t type
+/// @returns std::ostream &
+inline std::ostream &operator<<(std::ostream &os, enum ol_alloc_type_t value) {
+  switch (value) {
+  case OL_ALLOC_TYPE_HOST:
+    os << "OL_ALLOC_TYPE_HOST";
+    break;
+  case OL_ALLOC_TYPE_DEVICE:
+    os << "OL_ALLOC_TYPE_DEVICE";
+    break;
+  case OL_ALLOC_TYPE_SHARED:
+    os << "OL_ALLOC_TYPE_SHARED";
+    break;
+  default:
+    os << "unknown enumerator";
+    break;
+  }
+  return os;
+}
 
 inline std::ostream &operator<<(std::ostream &os,
                                 const ol_error_struct_t *Err) {
@@ -402,6 +423,38 @@ operator<<(std::ostream &os,
   return os;
 }
 
+inline std::ostream &operator<<(std::ostream &os,
+                                const struct ol_mem_alloc_params_t *params) {
+  os << ".Device = ";
+  printPtr(os, *params->pDevice);
+  os << ", ";
+  os << ".Type = ";
+  os << *params->pType;
+  os << ", ";
+  os << ".Size = ";
+  os << *params->pSize;
+  os << ", ";
+  os << ".Aligment = ";
+  os << *params->pAligment;
+  os << ", ";
+  os << ".AllocationOut = ";
+  printPtr(os, *params->pAllocationOut);
+  return os;
+}
+
+inline std::ostream &operator<<(std::ostream &os,
+                                const struct ol_mem_free_params_t *params) {
+  os << ".Device = ";
+  printPtr(os, *params->pDevice);
+  os << ", ";
+  os << ".Type = ";
+  os << *params->pType;
+  os << ", ";
+  os << ".Address = ";
+  printPtr(os, *params->pAddress);
+  return os;
+}
+
 ///////////////////////////////////////////////////////////////////////////////
 // @brief Print pointer value
 template <typename T>
diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp
index 457f1053f16341..3e609ed03917f4 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -245,3 +245,38 @@ ol_impl_result_t olGetDeviceInfoSize_impl(ol_device_handle_t Device,
                                           size_t *PropSizeRet) {
   return olGetDeviceInfoImplDetail(Device, PropName, 0, nullptr, PropSizeRet);
 }
+
+TargetAllocTy convertOlToPluginAllocTy(ol_alloc_type_t Type) {
+  switch (Type) {
+  case OL_ALLOC_TYPE_DEVICE:
+    return TARGET_ALLOC_DEVICE;
+  case OL_ALLOC_TYPE_HOST:
+    return TARGET_ALLOC_HOST;
+  case OL_ALLOC_TYPE_SHARED:
+  default:
+    return TARGET_ALLOC_SHARED;
+  }
+}
+
+ol_impl_result_t olMemAlloc_impl(ol_device_handle_t Device,
+                                 ol_alloc_type_t Type, size_t Size, size_t,
+                                 void **AllocationOut) {
+  auto Alloc =
+      Device->Device.dataAlloc(Size, nullptr, convertOlToPluginAllocTy(Type));
+  if (!Alloc) {
+    return {OL_ERRC_OUT_OF_RESOURCES,
+            formatv("Could not create allocation on device {0}", Device).str()};
+  }
+
+  *AllocationOut = *Alloc;
+  return OL_SUCCESS;
+}
+
+ol_impl_result_t olMemFree_impl(ol_device_handle_t Device, ol_alloc_type_t Type,
+                                void *Address) {
+  auto Res = Device->Device.dataDelete(Address, convertOlToPluginAllocTy(Type));
+  if (Res) {
+    return {OL_ERRC_OUT_OF_RESOURCES, "Could not free allocation"};
+  }
+  return OL_SUCCESS;
+}

>From 73ed36a366dec72b63dccdc24d240e0efc0bf528 Mon Sep 17 00:00:00 2001
From: Callum Fare <callum at codeplay.com>
Date: Wed, 11 Dec 2024 12:13:29 +0000
Subject: [PATCH 2/3] Add size check

---
 offload/liboffload/API/Memory.td                            | 6 +++++-
 offload/liboffload/include/generated/OffloadAPI.h           | 2 ++
 offload/liboffload/include/generated/OffloadEntryPoints.inc | 4 ++++
 3 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/offload/liboffload/API/Memory.td b/offload/liboffload/API/Memory.td
index 8cfaf70311e346..c15ae6f6d21ca2 100644
--- a/offload/liboffload/API/Memory.td
+++ b/offload/liboffload/API/Memory.td
@@ -30,7 +30,11 @@ def : Function {
     Param<"size_t", "Aligment", "alignment of the allocation in bytes", PARAM_IN>,
     Param<"void**", "AllocationOut", "output for the allocated pointer", PARAM_OUT>
   ];
-  let returns = [];
+  let returns = [
+    Return<"OL_ERRC_INVALID_SIZE", [
+      "`Size == 0`"
+    ]>
+  ];
 }
 
 def : Function {
diff --git a/offload/liboffload/include/generated/OffloadAPI.h b/offload/liboffload/include/generated/OffloadAPI.h
index 81f3a8e0201bad..4c3356645e55aa 100644
--- a/offload/liboffload/include/generated/OffloadAPI.h
+++ b/offload/liboffload/include/generated/OffloadAPI.h
@@ -484,6 +484,8 @@ typedef enum ol_alloc_type_t {
 ///     - ::OL_RESULT_SUCCESS
 ///     - ::OL_ERRC_UNINITIALIZED
 ///     - ::OL_ERRC_DEVICE_LOST
+///     - ::OL_ERRC_INVALID_SIZE
+///         + `Size == 0`
 ///     - ::OL_ERRC_INVALID_NULL_HANDLE
 ///         + `NULL == Device`
 ///     - ::OL_ERRC_INVALID_NULL_POINTER
diff --git a/offload/liboffload/include/generated/OffloadEntryPoints.inc b/offload/liboffload/include/generated/OffloadEntryPoints.inc
index 08060dae80f035..bcde65452b265f 100644
--- a/offload/liboffload/include/generated/OffloadEntryPoints.inc
+++ b/offload/liboffload/include/generated/OffloadEntryPoints.inc
@@ -445,6 +445,10 @@ ol_impl_result_t olMemAlloc_val(ol_device_handle_t Device, ol_alloc_type_t Type,
                                 size_t Size, size_t Aligment,
                                 void **AllocationOut) {
   if (true /*enableParameterValidation*/) {
+    if (Size == 0) {
+      return OL_ERRC_INVALID_SIZE;
+    }
+
     if (NULL == Device) {
       return OL_ERRC_INVALID_NULL_HANDLE;
     }

>From be5c36bd2b23fc9eb7886586d8687bde4de145e0 Mon Sep 17 00:00:00 2001
From: Callum Fare <callum at codeplay.com>
Date: Mon, 6 Jan 2025 15:22:52 +0000
Subject: [PATCH 3/3] Implement minimum Offload API needed to launch a SYCL
 kernel

---
 offload/liboffload/API/Common.td              |  20 +
 offload/liboffload/API/Enqueue.td             |  68 ++
 offload/liboffload/API/Event.td               |  41 +
 offload/liboffload/API/Kernel.td              |  44 +
 offload/liboffload/API/OffloadAPI.td          |   5 +
 offload/liboffload/API/Program.td             |  44 +
 offload/liboffload/API/Queue.td               |  52 ++
 .../liboffload/include/generated/OffloadAPI.h | 656 +++++++++++++++
 .../include/generated/OffloadEntryPoints.inc  | 775 ++++++++++++++++++
 .../include/generated/OffloadFuncs.inc        |  36 +
 .../generated/OffloadImplFuncDecls.inc        |  54 ++
 .../include/generated/OffloadPrint.hpp        | 210 +++++
 offload/liboffload/src/OffloadImpl.cpp        | 360 ++++++++
 .../common/include/GlobalHandler.h            |   5 +-
 offload/plugins-nextgen/cuda/src/rtl.cpp      |  28 +
 offload/plugins-nextgen/host/src/rtl.cpp      |   4 +-
 offload/unittests/OffloadAPI/CMakeLists.txt   |   4 +-
 .../OffloadAPI/queue/olCreateQueue.cpp        |  19 +
 18 files changed, 2420 insertions(+), 5 deletions(-)
 create mode 100644 offload/liboffload/API/Enqueue.td
 create mode 100644 offload/liboffload/API/Event.td
 create mode 100644 offload/liboffload/API/Kernel.td
 create mode 100644 offload/liboffload/API/Program.td
 create mode 100644 offload/liboffload/API/Queue.td
 create mode 100644 offload/unittests/OffloadAPI/queue/olCreateQueue.cpp

diff --git a/offload/liboffload/API/Common.td b/offload/liboffload/API/Common.td
index 5b19d1d47129ef..7fedb2002f157e 100644
--- a/offload/liboffload/API/Common.td
+++ b/offload/liboffload/API/Common.td
@@ -62,6 +62,26 @@ def : Handle {
   let desc = "Handle of context object";
 }
 
+def : Handle {
+  let name = "ol_queue_handle_t";
+  let desc = "Handle of queue object";
+}
+
+def : Handle {
+  let name = "ol_event_handle_t";
+  let desc = "Handle of event object";
+}
+
+def : Handle {
+  let name = "ol_program_handle_t";
+  let desc = "Handle of program object";
+}
+
+def : Handle {
+  let name = "ol_kernel_handle_t";
+  let desc = "Handle of kernel object";
+}
+
 def : Enum {
   let name = "ol_errc_t";
   let desc = "Defines Return/Error codes";
diff --git a/offload/liboffload/API/Enqueue.td b/offload/liboffload/API/Enqueue.td
new file mode 100644
index 00000000000000..621eb3a2f410ef
--- /dev/null
+++ b/offload/liboffload/API/Enqueue.td
@@ -0,0 +1,68 @@
+//===-- Enqueue.td - Enqueue definitions for Offload -------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains Offload API definitions related to enqueable operations
+//
+//===----------------------------------------------------------------------===//
+
+def : Function {
+    let name = "olEnqueueDataWrite";
+    let desc = "Enqueue a write operation from host to device memory";
+    let details = [];
+    let params = [
+        Param<"ol_queue_handle_t", "Queue", "handle of the queue", PARAM_IN>,
+        Param<"void*", "SrcPtr", "host pointer to copy from", PARAM_IN>,
+        Param<"void*", "DstPtr", "device pointer to copy to", PARAM_IN>,
+        Param<"size_t", "Size", "size in bytes of data to copy", PARAM_IN>,
+        Param<"ol_event_handle_t*", "EventOut", "optional recorded event for the enqueued operation", PARAM_OUT_OPTIONAL>
+    ];
+    let returns = [];
+}
+
+def : Function {
+    let name = "olEnqueueDataRead";
+    let desc = "Enqueue a read operation from device to host memory";
+    let details = [];
+    let params = [
+        Param<"ol_queue_handle_t", "Queue", "handle of the queue", PARAM_IN>,
+        Param<"void*", "SrcPtr", "device pointer to copy from", PARAM_IN>,
+        Param<"void*", "DstPtr", "host pointer to copy to", PARAM_IN>,
+        Param<"size_t", "Size", "size in bytes of data to copy", PARAM_IN>,
+        Param<"ol_event_handle_t*", "EventOut", "optional recorded event for the enqueued operation", PARAM_OUT_OPTIONAL>
+    ];
+    let returns = [];
+}
+
+def : Function {
+    let name = "olEnqueueDataCopy";
+    let desc = "Enqueue a write operation between device allocations";
+    let details = [];
+    let params = [
+        Param<"ol_queue_handle_t", "Queue", "handle of the queue", PARAM_IN>,
+        Param<"void*", "SrcPtr", "device pointer to copy from", PARAM_IN>,
+        Param<"void*", "DstPtr", "device pointer to copy to", PARAM_IN>,
+        Param<"ol_device_handle_t", "DstDevice", "device that the destination pointer is resident on", PARAM_IN>,
+        Param<"size_t", "Size", "size in bytes of data to copy", PARAM_IN>,
+        Param<"ol_event_handle_t*", "EventOut", "optional recorded event for the enqueued operation", PARAM_OUT_OPTIONAL>
+    ];
+    let returns = [];
+}
+
+
+def : Function {
+    let name = "olEnqueueKernelLaunch";
+    let desc = "Enqueue a kernel launch with the specified size and parameters";
+    let details = [];
+    let params = [
+        Param<"ol_queue_handle_t", "Queue", "handle of the queue", PARAM_IN>,
+        Param<"ol_kernel_handle_t", "Kernel", "handle of the kernel", PARAM_IN>,
+        Param<"const size_t*", "GlobalWorkSize", "an array of size 3 representing the global work size", PARAM_IN>,
+        Param<"ol_event_handle_t*", "EventOut", "optional recorded event for the enqueued operation", PARAM_OUT_OPTIONAL>
+    ];
+    let returns = [];
+}
diff --git a/offload/liboffload/API/Event.td b/offload/liboffload/API/Event.td
new file mode 100644
index 00000000000000..db90a7c8e2be43
--- /dev/null
+++ b/offload/liboffload/API/Event.td
@@ -0,0 +1,41 @@
+//===-- Event.td - Event definitions for Offload -----------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains Offload API definitions related to the event handle
+//
+//===----------------------------------------------------------------------===//
+
+def : Function {
+    let name = "olRetainEvent";
+    let desc = "Increment the reference count of the given event";
+    let details = [];
+    let params = [
+        Param<"ol_event_handle_t", "Event", "handle of the event", PARAM_IN>
+    ];
+    let returns = [];
+}
+
+def : Function {
+    let name = "olReleaseEvent";
+    let desc = "Decrement the reference count of the given event";
+    let details = [];
+    let params = [
+        Param<"ol_event_handle_t", "Event", "handle of the event", PARAM_IN>
+    ];
+    let returns = [];
+}
+
+def : Function {
+    let name = "olWaitEvent";
+    let desc = "Wait for the event to be complete";
+    let details = [];
+    let params = [
+        Param<"ol_event_handle_t", "Event", "handle of the event", PARAM_IN>
+    ];
+    let returns = [];
+}
diff --git a/offload/liboffload/API/Kernel.td b/offload/liboffload/API/Kernel.td
new file mode 100644
index 00000000000000..936372c18ca370
--- /dev/null
+++ b/offload/liboffload/API/Kernel.td
@@ -0,0 +1,44 @@
+def : Function {
+    let name = "olCreateKernel";
+    let desc = "";
+    let details = [];
+    let params = [
+        Param<"ol_program_handle_t", "Program", "handle of the program", PARAM_IN>,
+        Param<"const char*", "KernelName", "name of the kernel entry point in the program", PARAM_IN>,
+        Param<"ol_kernel_handle_t*", "Kernel", "output pointer for the created kernel", PARAM_OUT>
+    ];
+    let returns = [];
+}
+
+def : Function {
+    let name = "olRetainKernel";
+    let desc = "Create a queue for the given device";
+    let details = [];
+    let params = [
+        Param<"ol_kernel_handle_t", "Kernel", "handle of the kernel", PARAM_IN>
+    ];
+    let returns = [];
+}
+
+def : Function {
+    let name = "olReleaseKernel";
+    let desc = "Create a queue for the given device";
+    let details = [];
+    let params = [
+        Param<"ol_kernel_handle_t", "Kernel", "handle of the kernel", PARAM_IN>
+    ];
+    let returns = [];
+}
+
+def : Function {
+    let name = "olSetKernelArgValue";
+    let desc = "Create a queue for the given device";
+    let details = [];
+    let params = [
+        Param<"ol_kernel_handle_t", "Kernel", "handle of the kernel", PARAM_IN>,
+        Param<"uint32_t", "Index", "index of the argument", PARAM_IN>,
+        Param<"size_t", "Size", "size of the argument data", PARAM_IN>,
+        Param<"void*", "ArgData", "pointer to the argument data", PARAM_IN>
+    ];
+    let returns = [];
+}
diff --git a/offload/liboffload/API/OffloadAPI.td b/offload/liboffload/API/OffloadAPI.td
index a609cc7ac80b41..f2822b93e6bf8f 100644
--- a/offload/liboffload/API/OffloadAPI.td
+++ b/offload/liboffload/API/OffloadAPI.td
@@ -14,3 +14,8 @@ include "Common.td"
 include "Platform.td"
 include "Device.td"
 include "Memory.td"
+include "Queue.td"
+include "Event.td"
+include "Enqueue.td"
+include "Program.td"
+include "Kernel.td"
diff --git a/offload/liboffload/API/Program.td b/offload/liboffload/API/Program.td
new file mode 100644
index 00000000000000..684a6581320f8d
--- /dev/null
+++ b/offload/liboffload/API/Program.td
@@ -0,0 +1,44 @@
+//===-- Program.td - Program definitions for Offload -------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains Offload API definitions related to the program handle
+//
+//===----------------------------------------------------------------------===//
+
+def : Function {
+    let name = "olCreateProgram";
+    let desc = "";
+    let details = [];
+    let params = [
+        Param<"ol_device_handle_t", "Device", "handle of the device", PARAM_IN>,
+        Param<"void*", "ProgData", "pointer to the program binary data", PARAM_IN>,
+        Param<"size_t", "ProgDataSize", "size of the program binary in bytes", PARAM_IN>,
+        Param<"ol_program_handle_t*", "Queue", "output pointer for the created program", PARAM_OUT>
+    ];
+    let returns = [];
+}
+
+def : Function {
+    let name = "olRetainProgram";
+    let desc = "Create a queue for the given device";
+    let details = [];
+    let params = [
+        Param<"ol_program_handle_t", "Program", "handle of the program", PARAM_IN>
+    ];
+    let returns = [];
+}
+
+def : Function {
+    let name = "olReleaseProgram";
+    let desc = "Create a queue for the given device";
+    let details = [];
+    let params = [
+        Param<"ol_program_handle_t", "Program", "handle of the program", PARAM_IN>
+    ];
+    let returns = [];
+}
diff --git a/offload/liboffload/API/Queue.td b/offload/liboffload/API/Queue.td
new file mode 100644
index 00000000000000..5629fa40d56d5f
--- /dev/null
+++ b/offload/liboffload/API/Queue.td
@@ -0,0 +1,52 @@
+//===-- Queue.td - Queue definitions for Offload -----------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains Offload API definitions related to the queue handle
+//
+//===----------------------------------------------------------------------===//
+
+def : Function {
+    let name = "olCreateQueue";
+    let desc = "Create a queue for the given device";
+    let details = [];
+    let params = [
+        Param<"ol_device_handle_t", "Device", "handle of the device", PARAM_IN>,
+        Param<"ol_queue_handle_t*", "Queue", "output pointer for the created queue", PARAM_OUT>
+    ];
+    let returns = [];
+}
+
+def : Function {
+    let name = "olRetainQueue";
+    let desc = "Create a queue for the given device";
+    let details = [];
+    let params = [
+        Param<"ol_queue_handle_t", "Queue", "handle of the queue", PARAM_IN>
+    ];
+    let returns = [];
+}
+
+def : Function {
+    let name = "olReleaseQueue";
+    let desc = "Create a queue for the given device";
+    let details = [];
+    let params = [
+        Param<"ol_queue_handle_t", "Queue", "handle of the queue", PARAM_IN>
+    ];
+    let returns = [];
+}
+
+def : Function {
+    let name = "olFinishQueue";
+    let desc = "Wait for the enqueued work on a queue to complete";
+    let details = [];
+    let params = [
+        Param<"ol_queue_handle_t", "Queue", "handle of the queue", PARAM_IN>
+    ];
+    let returns = [];
+}
diff --git a/offload/liboffload/include/generated/OffloadAPI.h b/offload/liboffload/include/generated/OffloadAPI.h
index 4c3356645e55aa..2384de19ae72e5 100644
--- a/offload/liboffload/include/generated/OffloadAPI.h
+++ b/offload/liboffload/include/generated/OffloadAPI.h
@@ -85,6 +85,22 @@ typedef struct ol_device_handle_t_ *ol_device_handle_t;
 /// @brief Handle of context object
 typedef struct ol_context_handle_t_ *ol_context_handle_t;
 
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Handle of queue object
+typedef struct ol_queue_handle_t_ *ol_queue_handle_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Handle of event object
+typedef struct ol_event_handle_t_ *ol_event_handle_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Handle of program object
+typedef struct ol_program_handle_t_ *ol_program_handle_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Handle of kernel object
+typedef struct ol_kernel_handle_t_ *ol_kernel_handle_t;
+
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Defines Return/Error codes
 typedef enum ol_errc_t {
@@ -523,6 +539,359 @@ OL_APIEXPORT ol_result_t OL_APICALL olMemFree(
     // [in] address of the allocation to free
     void *Address);
 
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Create a queue for the given device
+///
+/// @details
+///
+/// @returns
+///     - ::OL_RESULT_SUCCESS
+///     - ::OL_ERRC_UNINITIALIZED
+///     - ::OL_ERRC_DEVICE_LOST
+///     - ::OL_ERRC_INVALID_NULL_HANDLE
+///         + `NULL == Device`
+///     - ::OL_ERRC_INVALID_NULL_POINTER
+///         + `NULL == Queue`
+OL_APIEXPORT ol_result_t OL_APICALL olCreateQueue(
+    // [in] handle of the device
+    ol_device_handle_t Device,
+    // [out] output pointer for the created queue
+    ol_queue_handle_t *Queue);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Create a queue for the given device
+///
+/// @details
+///
+/// @returns
+///     - ::OL_RESULT_SUCCESS
+///     - ::OL_ERRC_UNINITIALIZED
+///     - ::OL_ERRC_DEVICE_LOST
+///     - ::OL_ERRC_INVALID_NULL_HANDLE
+///         + `NULL == Queue`
+///     - ::OL_ERRC_INVALID_NULL_POINTER
+OL_APIEXPORT ol_result_t OL_APICALL olRetainQueue(
+    // [in] handle of the queue
+    ol_queue_handle_t Queue);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Create a queue for the given device
+///
+/// @details
+///
+/// @returns
+///     - ::OL_RESULT_SUCCESS
+///     - ::OL_ERRC_UNINITIALIZED
+///     - ::OL_ERRC_DEVICE_LOST
+///     - ::OL_ERRC_INVALID_NULL_HANDLE
+///         + `NULL == Queue`
+///     - ::OL_ERRC_INVALID_NULL_POINTER
+OL_APIEXPORT ol_result_t OL_APICALL olReleaseQueue(
+    // [in] handle of the queue
+    ol_queue_handle_t Queue);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Wait for the enqueued work on a queue to complete
+///
+/// @details
+///
+/// @returns
+///     - ::OL_RESULT_SUCCESS
+///     - ::OL_ERRC_UNINITIALIZED
+///     - ::OL_ERRC_DEVICE_LOST
+///     - ::OL_ERRC_INVALID_NULL_HANDLE
+///         + `NULL == Queue`
+///     - ::OL_ERRC_INVALID_NULL_POINTER
+OL_APIEXPORT ol_result_t OL_APICALL olFinishQueue(
+    // [in] handle of the queue
+    ol_queue_handle_t Queue);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Increment the reference count of the given event
+///
+/// @details
+///
+/// @returns
+///     - ::OL_RESULT_SUCCESS
+///     - ::OL_ERRC_UNINITIALIZED
+///     - ::OL_ERRC_DEVICE_LOST
+///     - ::OL_ERRC_INVALID_NULL_HANDLE
+///         + `NULL == Event`
+///     - ::OL_ERRC_INVALID_NULL_POINTER
+OL_APIEXPORT ol_result_t OL_APICALL olRetainEvent(
+    // [in] handle of the event
+    ol_event_handle_t Event);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Decrement the reference count of the given event
+///
+/// @details
+///
+/// @returns
+///     - ::OL_RESULT_SUCCESS
+///     - ::OL_ERRC_UNINITIALIZED
+///     - ::OL_ERRC_DEVICE_LOST
+///     - ::OL_ERRC_INVALID_NULL_HANDLE
+///         + `NULL == Event`
+///     - ::OL_ERRC_INVALID_NULL_POINTER
+OL_APIEXPORT ol_result_t OL_APICALL olReleaseEvent(
+    // [in] handle of the event
+    ol_event_handle_t Event);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Wait for the event to be complete
+///
+/// @details
+///
+/// @returns
+///     - ::OL_RESULT_SUCCESS
+///     - ::OL_ERRC_UNINITIALIZED
+///     - ::OL_ERRC_DEVICE_LOST
+///     - ::OL_ERRC_INVALID_NULL_HANDLE
+///         + `NULL == Event`
+///     - ::OL_ERRC_INVALID_NULL_POINTER
+OL_APIEXPORT ol_result_t OL_APICALL olWaitEvent(
+    // [in] handle of the event
+    ol_event_handle_t Event);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Enqueue a write operation from host to device memory
+///
+/// @details
+///
+/// @returns
+///     - ::OL_RESULT_SUCCESS
+///     - ::OL_ERRC_UNINITIALIZED
+///     - ::OL_ERRC_DEVICE_LOST
+///     - ::OL_ERRC_INVALID_NULL_HANDLE
+///         + `NULL == Queue`
+///     - ::OL_ERRC_INVALID_NULL_POINTER
+///         + `NULL == SrcPtr`
+///         + `NULL == DstPtr`
+OL_APIEXPORT ol_result_t OL_APICALL olEnqueueDataWrite(
+    // [in] handle of the queue
+    ol_queue_handle_t Queue,
+    // [in] host pointer to copy from
+    void *SrcPtr,
+    // [in] device pointer to copy to
+    void *DstPtr,
+    // [in] size in bytes of data to copy
+    size_t Size,
+    // [out][optional] optional recorded event for the enqueued operation
+    ol_event_handle_t *EventOut);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Enqueue a read operation from device to host memory
+///
+/// @details
+///
+/// @returns
+///     - ::OL_RESULT_SUCCESS
+///     - ::OL_ERRC_UNINITIALIZED
+///     - ::OL_ERRC_DEVICE_LOST
+///     - ::OL_ERRC_INVALID_NULL_HANDLE
+///         + `NULL == Queue`
+///     - ::OL_ERRC_INVALID_NULL_POINTER
+///         + `NULL == SrcPtr`
+///         + `NULL == DstPtr`
+OL_APIEXPORT ol_result_t OL_APICALL olEnqueueDataRead(
+    // [in] handle of the queue
+    ol_queue_handle_t Queue,
+    // [in] device pointer to copy from
+    void *SrcPtr,
+    // [in] host pointer to copy to
+    void *DstPtr,
+    // [in] size in bytes of data to copy
+    size_t Size,
+    // [out][optional] optional recorded event for the enqueued operation
+    ol_event_handle_t *EventOut);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Enqueue a write operation between device allocations
+///
+/// @details
+///
+/// @returns
+///     - ::OL_RESULT_SUCCESS
+///     - ::OL_ERRC_UNINITIALIZED
+///     - ::OL_ERRC_DEVICE_LOST
+///     - ::OL_ERRC_INVALID_NULL_HANDLE
+///         + `NULL == Queue`
+///         + `NULL == DstDevice`
+///     - ::OL_ERRC_INVALID_NULL_POINTER
+///         + `NULL == SrcPtr`
+///         + `NULL == DstPtr`
+OL_APIEXPORT ol_result_t OL_APICALL olEnqueueDataCopy(
+    // [in] handle of the queue
+    ol_queue_handle_t Queue,
+    // [in] device pointer to copy from
+    void *SrcPtr,
+    // [in] device pointer to copy to
+    void *DstPtr,
+    // [in] device that the destination pointer is resident on
+    ol_device_handle_t DstDevice,
+    // [in] size in bytes of data to copy
+    size_t Size,
+    // [out][optional] optional recorded event for the enqueued operation
+    ol_event_handle_t *EventOut);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Enqueue a kernel launch with the specified size and parameters
+///
+/// @details
+///
+/// @returns
+///     - ::OL_RESULT_SUCCESS
+///     - ::OL_ERRC_UNINITIALIZED
+///     - ::OL_ERRC_DEVICE_LOST
+///     - ::OL_ERRC_INVALID_NULL_HANDLE
+///         + `NULL == Queue`
+///         + `NULL == Kernel`
+///     - ::OL_ERRC_INVALID_NULL_POINTER
+///         + `NULL == GlobalWorkSize`
+OL_APIEXPORT ol_result_t OL_APICALL olEnqueueKernelLaunch(
+    // [in] handle of the queue
+    ol_queue_handle_t Queue,
+    // [in] handle of the kernel
+    ol_kernel_handle_t Kernel,
+    // [in] an array of size 3 representing the global work size
+    const size_t *GlobalWorkSize,
+    // [out][optional] optional recorded event for the enqueued operation
+    ol_event_handle_t *EventOut);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief
+///
+/// @details
+///
+/// @returns
+///     - ::OL_RESULT_SUCCESS
+///     - ::OL_ERRC_UNINITIALIZED
+///     - ::OL_ERRC_DEVICE_LOST
+///     - ::OL_ERRC_INVALID_NULL_HANDLE
+///         + `NULL == Device`
+///     - ::OL_ERRC_INVALID_NULL_POINTER
+///         + `NULL == ProgData`
+///         + `NULL == Queue`
+OL_APIEXPORT ol_result_t OL_APICALL olCreateProgram(
+    // [in] handle of the device
+    ol_device_handle_t Device,
+    // [in] pointer to the program binary data
+    void *ProgData,
+    // [in] size of the program binary in bytes
+    size_t ProgDataSize,
+    // [out] output pointer for the created program
+    ol_program_handle_t *Queue);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Create a queue for the given device
+///
+/// @details
+///
+/// @returns
+///     - ::OL_RESULT_SUCCESS
+///     - ::OL_ERRC_UNINITIALIZED
+///     - ::OL_ERRC_DEVICE_LOST
+///     - ::OL_ERRC_INVALID_NULL_HANDLE
+///         + `NULL == Program`
+///     - ::OL_ERRC_INVALID_NULL_POINTER
+OL_APIEXPORT ol_result_t OL_APICALL olRetainProgram(
+    // [in] handle of the program
+    ol_program_handle_t Program);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Create a queue for the given device
+///
+/// @details
+///
+/// @returns
+///     - ::OL_RESULT_SUCCESS
+///     - ::OL_ERRC_UNINITIALIZED
+///     - ::OL_ERRC_DEVICE_LOST
+///     - ::OL_ERRC_INVALID_NULL_HANDLE
+///         + `NULL == Program`
+///     - ::OL_ERRC_INVALID_NULL_POINTER
+OL_APIEXPORT ol_result_t OL_APICALL olReleaseProgram(
+    // [in] handle of the program
+    ol_program_handle_t Program);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief
+///
+/// @details
+///
+/// @returns
+///     - ::OL_RESULT_SUCCESS
+///     - ::OL_ERRC_UNINITIALIZED
+///     - ::OL_ERRC_DEVICE_LOST
+///     - ::OL_ERRC_INVALID_NULL_HANDLE
+///         + `NULL == Program`
+///     - ::OL_ERRC_INVALID_NULL_POINTER
+///         + `NULL == KernelName`
+///         + `NULL == Kernel`
+OL_APIEXPORT ol_result_t OL_APICALL olCreateKernel(
+    // [in] handle of the program
+    ol_program_handle_t Program,
+    // [in] name of the kernel entry point in the program
+    const char *KernelName,
+    // [out] output pointer for the created kernel
+    ol_kernel_handle_t *Kernel);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Create a queue for the given device
+///
+/// @details
+///
+/// @returns
+///     - ::OL_RESULT_SUCCESS
+///     - ::OL_ERRC_UNINITIALIZED
+///     - ::OL_ERRC_DEVICE_LOST
+///     - ::OL_ERRC_INVALID_NULL_HANDLE
+///         + `NULL == Kernel`
+///     - ::OL_ERRC_INVALID_NULL_POINTER
+OL_APIEXPORT ol_result_t OL_APICALL olRetainKernel(
+    // [in] handle of the kernel
+    ol_kernel_handle_t Kernel);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Create a queue for the given device
+///
+/// @details
+///
+/// @returns
+///     - ::OL_RESULT_SUCCESS
+///     - ::OL_ERRC_UNINITIALIZED
+///     - ::OL_ERRC_DEVICE_LOST
+///     - ::OL_ERRC_INVALID_NULL_HANDLE
+///         + `NULL == Kernel`
+///     - ::OL_ERRC_INVALID_NULL_POINTER
+OL_APIEXPORT ol_result_t OL_APICALL olReleaseKernel(
+    // [in] handle of the kernel
+    ol_kernel_handle_t Kernel);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Create a queue for the given device
+///
+/// @details
+///
+/// @returns
+///     - ::OL_RESULT_SUCCESS
+///     - ::OL_ERRC_UNINITIALIZED
+///     - ::OL_ERRC_DEVICE_LOST
+///     - ::OL_ERRC_INVALID_NULL_HANDLE
+///         + `NULL == Kernel`
+///     - ::OL_ERRC_INVALID_NULL_POINTER
+///         + `NULL == ArgData`
+OL_APIEXPORT ol_result_t OL_APICALL olSetKernelArgValue(
+    // [in] handle of the kernel
+    ol_kernel_handle_t Kernel,
+    // [in] index of the argument
+    uint32_t Index,
+    // [in] size of the argument data
+    size_t Size,
+    // [in] pointer to the argument data
+    void *ArgData);
+
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Function parameters for olGetPlatform
 /// @details Each entry is a pointer to the parameter passed to the function;
@@ -613,6 +982,157 @@ typedef struct ol_mem_free_params_t {
   void **pAddress;
 } ol_mem_free_params_t;
 
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Function parameters for olCreateQueue
+/// @details Each entry is a pointer to the parameter passed to the function;
+typedef struct ol_create_queue_params_t {
+  ol_device_handle_t *pDevice;
+  ol_queue_handle_t **pQueue;
+} ol_create_queue_params_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Function parameters for olRetainQueue
+/// @details Each entry is a pointer to the parameter passed to the function;
+typedef struct ol_retain_queue_params_t {
+  ol_queue_handle_t *pQueue;
+} ol_retain_queue_params_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Function parameters for olReleaseQueue
+/// @details Each entry is a pointer to the parameter passed to the function;
+typedef struct ol_release_queue_params_t {
+  ol_queue_handle_t *pQueue;
+} ol_release_queue_params_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Function parameters for olFinishQueue
+/// @details Each entry is a pointer to the parameter passed to the function;
+typedef struct ol_finish_queue_params_t {
+  ol_queue_handle_t *pQueue;
+} ol_finish_queue_params_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Function parameters for olRetainEvent
+/// @details Each entry is a pointer to the parameter passed to the function;
+typedef struct ol_retain_event_params_t {
+  ol_event_handle_t *pEvent;
+} ol_retain_event_params_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Function parameters for olReleaseEvent
+/// @details Each entry is a pointer to the parameter passed to the function;
+typedef struct ol_release_event_params_t {
+  ol_event_handle_t *pEvent;
+} ol_release_event_params_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Function parameters for olWaitEvent
+/// @details Each entry is a pointer to the parameter passed to the function;
+typedef struct ol_wait_event_params_t {
+  ol_event_handle_t *pEvent;
+} ol_wait_event_params_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Function parameters for olEnqueueDataWrite
+/// @details Each entry is a pointer to the parameter passed to the function;
+typedef struct ol_enqueue_data_write_params_t {
+  ol_queue_handle_t *pQueue;
+  void **pSrcPtr;
+  void **pDstPtr;
+  size_t *pSize;
+  ol_event_handle_t **pEventOut;
+} ol_enqueue_data_write_params_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Function parameters for olEnqueueDataRead
+/// @details Each entry is a pointer to the parameter passed to the function;
+typedef struct ol_enqueue_data_read_params_t {
+  ol_queue_handle_t *pQueue;
+  void **pSrcPtr;
+  void **pDstPtr;
+  size_t *pSize;
+  ol_event_handle_t **pEventOut;
+} ol_enqueue_data_read_params_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Function parameters for olEnqueueDataCopy
+/// @details Each entry is a pointer to the parameter passed to the function;
+typedef struct ol_enqueue_data_copy_params_t {
+  ol_queue_handle_t *pQueue;
+  void **pSrcPtr;
+  void **pDstPtr;
+  ol_device_handle_t *pDstDevice;
+  size_t *pSize;
+  ol_event_handle_t **pEventOut;
+} ol_enqueue_data_copy_params_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Function parameters for olEnqueueKernelLaunch
+/// @details Each entry is a pointer to the parameter passed to the function;
+typedef struct ol_enqueue_kernel_launch_params_t {
+  ol_queue_handle_t *pQueue;
+  ol_kernel_handle_t *pKernel;
+  const size_t **pGlobalWorkSize;
+  ol_event_handle_t **pEventOut;
+} ol_enqueue_kernel_launch_params_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Function parameters for olCreateProgram
+/// @details Each entry is a pointer to the parameter passed to the function;
+typedef struct ol_create_program_params_t {
+  ol_device_handle_t *pDevice;
+  void **pProgData;
+  size_t *pProgDataSize;
+  ol_program_handle_t **pQueue;
+} ol_create_program_params_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Function parameters for olRetainProgram
+/// @details Each entry is a pointer to the parameter passed to the function;
+typedef struct ol_retain_program_params_t {
+  ol_program_handle_t *pProgram;
+} ol_retain_program_params_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Function parameters for olReleaseProgram
+/// @details Each entry is a pointer to the parameter passed to the function;
+typedef struct ol_release_program_params_t {
+  ol_program_handle_t *pProgram;
+} ol_release_program_params_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Function parameters for olCreateKernel
+/// @details Each entry is a pointer to the parameter passed to the function;
+typedef struct ol_create_kernel_params_t {
+  ol_program_handle_t *pProgram;
+  const char **pKernelName;
+  ol_kernel_handle_t **pKernel;
+} ol_create_kernel_params_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Function parameters for olRetainKernel
+/// @details Each entry is a pointer to the parameter passed to the function;
+typedef struct ol_retain_kernel_params_t {
+  ol_kernel_handle_t *pKernel;
+} ol_retain_kernel_params_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Function parameters for olReleaseKernel
+/// @details Each entry is a pointer to the parameter passed to the function;
+typedef struct ol_release_kernel_params_t {
+  ol_kernel_handle_t *pKernel;
+} ol_release_kernel_params_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Function parameters for olSetKernelArgValue
+/// @details Each entry is a pointer to the parameter passed to the function;
+typedef struct ol_set_kernel_arg_value_params_t {
+  ol_kernel_handle_t *pKernel;
+  uint32_t *pIndex;
+  size_t *pSize;
+  void **pArgData;
+} ol_set_kernel_arg_value_params_t;
+
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Variant of olInit that also sets source code location information
 /// @details See also ::olInit
@@ -702,6 +1222,142 @@ OL_APIEXPORT ol_result_t OL_APICALL
 olMemFreeWithCodeLoc(ol_device_handle_t Device, ol_alloc_type_t Type,
                      void *Address, ol_code_location_t *CodeLocation);
 
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Variant of olCreateQueue that also sets source code location
+/// information
+/// @details See also ::olCreateQueue
+OL_APIEXPORT ol_result_t OL_APICALL
+olCreateQueueWithCodeLoc(ol_device_handle_t Device, ol_queue_handle_t *Queue,
+                         ol_code_location_t *CodeLocation);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Variant of olRetainQueue that also sets source code location
+/// information
+/// @details See also ::olRetainQueue
+OL_APIEXPORT ol_result_t OL_APICALL olRetainQueueWithCodeLoc(
+    ol_queue_handle_t Queue, ol_code_location_t *CodeLocation);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Variant of olReleaseQueue that also sets source code location
+/// information
+/// @details See also ::olReleaseQueue
+OL_APIEXPORT ol_result_t OL_APICALL olReleaseQueueWithCodeLoc(
+    ol_queue_handle_t Queue, ol_code_location_t *CodeLocation);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Variant of olFinishQueue that also sets source code location
+/// information
+/// @details See also ::olFinishQueue
+OL_APIEXPORT ol_result_t OL_APICALL olFinishQueueWithCodeLoc(
+    ol_queue_handle_t Queue, ol_code_location_t *CodeLocation);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Variant of olRetainEvent that also sets source code location
+/// information
+/// @details See also ::olRetainEvent
+OL_APIEXPORT ol_result_t OL_APICALL olRetainEventWithCodeLoc(
+    ol_event_handle_t Event, ol_code_location_t *CodeLocation);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Variant of olReleaseEvent that also sets source code location
+/// information
+/// @details See also ::olReleaseEvent
+OL_APIEXPORT ol_result_t OL_APICALL olReleaseEventWithCodeLoc(
+    ol_event_handle_t Event, ol_code_location_t *CodeLocation);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Variant of olWaitEvent that also sets source code location
+/// information
+/// @details See also ::olWaitEvent
+OL_APIEXPORT ol_result_t OL_APICALL olWaitEventWithCodeLoc(
+    ol_event_handle_t Event, ol_code_location_t *CodeLocation);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Variant of olEnqueueDataWrite that also sets source code location
+/// information
+/// @details See also ::olEnqueueDataWrite
+OL_APIEXPORT ol_result_t OL_APICALL olEnqueueDataWriteWithCodeLoc(
+    ol_queue_handle_t Queue, void *SrcPtr, void *DstPtr, size_t Size,
+    ol_event_handle_t *EventOut, ol_code_location_t *CodeLocation);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Variant of olEnqueueDataRead that also sets source code location
+/// information
+/// @details See also ::olEnqueueDataRead
+OL_APIEXPORT ol_result_t OL_APICALL olEnqueueDataReadWithCodeLoc(
+    ol_queue_handle_t Queue, void *SrcPtr, void *DstPtr, size_t Size,
+    ol_event_handle_t *EventOut, ol_code_location_t *CodeLocation);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Variant of olEnqueueDataCopy that also sets source code location
+/// information
+/// @details See also ::olEnqueueDataCopy
+OL_APIEXPORT ol_result_t OL_APICALL olEnqueueDataCopyWithCodeLoc(
+    ol_queue_handle_t Queue, void *SrcPtr, void *DstPtr,
+    ol_device_handle_t DstDevice, size_t Size, ol_event_handle_t *EventOut,
+    ol_code_location_t *CodeLocation);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Variant of olEnqueueKernelLaunch that also sets source code location
+/// information
+/// @details See also ::olEnqueueKernelLaunch
+OL_APIEXPORT ol_result_t OL_APICALL olEnqueueKernelLaunchWithCodeLoc(
+    ol_queue_handle_t Queue, ol_kernel_handle_t Kernel,
+    const size_t *GlobalWorkSize, ol_event_handle_t *EventOut,
+    ol_code_location_t *CodeLocation);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Variant of olCreateProgram that also sets source code location
+/// information
+/// @details See also ::olCreateProgram
+OL_APIEXPORT ol_result_t OL_APICALL olCreateProgramWithCodeLoc(
+    ol_device_handle_t Device, void *ProgData, size_t ProgDataSize,
+    ol_program_handle_t *Queue, ol_code_location_t *CodeLocation);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Variant of olRetainProgram that also sets source code location
+/// information
+/// @details See also ::olRetainProgram
+OL_APIEXPORT ol_result_t OL_APICALL olRetainProgramWithCodeLoc(
+    ol_program_handle_t Program, ol_code_location_t *CodeLocation);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Variant of olReleaseProgram that also sets source code location
+/// information
+/// @details See also ::olReleaseProgram
+OL_APIEXPORT ol_result_t OL_APICALL olReleaseProgramWithCodeLoc(
+    ol_program_handle_t Program, ol_code_location_t *CodeLocation);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Variant of olCreateKernel that also sets source code location
+/// information
+/// @details See also ::olCreateKernel
+OL_APIEXPORT ol_result_t OL_APICALL olCreateKernelWithCodeLoc(
+    ol_program_handle_t Program, const char *KernelName,
+    ol_kernel_handle_t *Kernel, ol_code_location_t *CodeLocation);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Variant of olRetainKernel that also sets source code location
+/// information
+/// @details See also ::olRetainKernel
+OL_APIEXPORT ol_result_t OL_APICALL olRetainKernelWithCodeLoc(
+    ol_kernel_handle_t Kernel, ol_code_location_t *CodeLocation);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Variant of olReleaseKernel that also sets source code location
+/// information
+/// @details See also ::olReleaseKernel
+OL_APIEXPORT ol_result_t OL_APICALL olReleaseKernelWithCodeLoc(
+    ol_kernel_handle_t Kernel, ol_code_location_t *CodeLocation);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Variant of olSetKernelArgValue that also sets source code location
+/// information
+/// @details See also ::olSetKernelArgValue
+OL_APIEXPORT ol_result_t OL_APICALL olSetKernelArgValueWithCodeLoc(
+    ol_kernel_handle_t Kernel, uint32_t Index, size_t Size, void *ArgData,
+    ol_code_location_t *CodeLocation);
+
 #if defined(__cplusplus)
 } // extern "C"
 #endif
diff --git a/offload/liboffload/include/generated/OffloadEntryPoints.inc b/offload/liboffload/include/generated/OffloadEntryPoints.inc
index bcde65452b265f..0ae3c36f95827f 100644
--- a/offload/liboffload/include/generated/OffloadEntryPoints.inc
+++ b/offload/liboffload/include/generated/OffloadEntryPoints.inc
@@ -536,3 +536,778 @@ ol_result_t olMemFreeWithCodeLoc(ol_device_handle_t Device,
   currentCodeLocation() = nullptr;
   return Result;
 }
+
+///////////////////////////////////////////////////////////////////////////////
+ol_impl_result_t olCreateQueue_val(ol_device_handle_t Device,
+                                   ol_queue_handle_t *Queue) {
+  if (true /*enableParameterValidation*/) {
+    if (NULL == Device) {
+      return OL_ERRC_INVALID_NULL_HANDLE;
+    }
+
+    if (NULL == Queue) {
+      return OL_ERRC_INVALID_NULL_POINTER;
+    }
+  }
+
+  return olCreateQueue_impl(Device, Queue);
+}
+OL_APIEXPORT ol_result_t OL_APICALL olCreateQueue(ol_device_handle_t Device,
+                                                  ol_queue_handle_t *Queue) {
+  if (offloadConfig().TracingEnabled) {
+    std::cout << "---> olCreateQueue";
+  }
+
+  ol_result_t Result = olCreateQueue_val(Device, Queue);
+
+  if (offloadConfig().TracingEnabled) {
+    ol_create_queue_params_t Params = {&Device, &Queue};
+    std::cout << "(" << &Params << ")";
+    std::cout << "-> " << Result << "\n";
+    if (Result && Result->Details) {
+      std::cout << "     *Error Details* " << Result->Details << " \n";
+    }
+  }
+  return Result;
+}
+ol_result_t olCreateQueueWithCodeLoc(ol_device_handle_t Device,
+                                     ol_queue_handle_t *Queue,
+                                     ol_code_location_t *CodeLocation) {
+  currentCodeLocation() = CodeLocation;
+  ol_result_t Result = olCreateQueue(Device, Queue);
+
+  currentCodeLocation() = nullptr;
+  return Result;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+ol_impl_result_t olRetainQueue_val(ol_queue_handle_t Queue) {
+  if (true /*enableParameterValidation*/) {
+    if (NULL == Queue) {
+      return OL_ERRC_INVALID_NULL_HANDLE;
+    }
+  }
+
+  return olRetainQueue_impl(Queue);
+}
+OL_APIEXPORT ol_result_t OL_APICALL olRetainQueue(ol_queue_handle_t Queue) {
+  if (offloadConfig().TracingEnabled) {
+    std::cout << "---> olRetainQueue";
+  }
+
+  ol_result_t Result = olRetainQueue_val(Queue);
+
+  if (offloadConfig().TracingEnabled) {
+    ol_retain_queue_params_t Params = {&Queue};
+    std::cout << "(" << &Params << ")";
+    std::cout << "-> " << Result << "\n";
+    if (Result && Result->Details) {
+      std::cout << "     *Error Details* " << Result->Details << " \n";
+    }
+  }
+  return Result;
+}
+ol_result_t olRetainQueueWithCodeLoc(ol_queue_handle_t Queue,
+                                     ol_code_location_t *CodeLocation) {
+  currentCodeLocation() = CodeLocation;
+  ol_result_t Result = olRetainQueue(Queue);
+
+  currentCodeLocation() = nullptr;
+  return Result;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+ol_impl_result_t olReleaseQueue_val(ol_queue_handle_t Queue) {
+  if (true /*enableParameterValidation*/) {
+    if (NULL == Queue) {
+      return OL_ERRC_INVALID_NULL_HANDLE;
+    }
+  }
+
+  return olReleaseQueue_impl(Queue);
+}
+OL_APIEXPORT ol_result_t OL_APICALL olReleaseQueue(ol_queue_handle_t Queue) {
+  if (offloadConfig().TracingEnabled) {
+    std::cout << "---> olReleaseQueue";
+  }
+
+  ol_result_t Result = olReleaseQueue_val(Queue);
+
+  if (offloadConfig().TracingEnabled) {
+    ol_release_queue_params_t Params = {&Queue};
+    std::cout << "(" << &Params << ")";
+    std::cout << "-> " << Result << "\n";
+    if (Result && Result->Details) {
+      std::cout << "     *Error Details* " << Result->Details << " \n";
+    }
+  }
+  return Result;
+}
+ol_result_t olReleaseQueueWithCodeLoc(ol_queue_handle_t Queue,
+                                      ol_code_location_t *CodeLocation) {
+  currentCodeLocation() = CodeLocation;
+  ol_result_t Result = olReleaseQueue(Queue);
+
+  currentCodeLocation() = nullptr;
+  return Result;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+ol_impl_result_t olFinishQueue_val(ol_queue_handle_t Queue) {
+  if (true /*enableParameterValidation*/) {
+    if (NULL == Queue) {
+      return OL_ERRC_INVALID_NULL_HANDLE;
+    }
+  }
+
+  return olFinishQueue_impl(Queue);
+}
+OL_APIEXPORT ol_result_t OL_APICALL olFinishQueue(ol_queue_handle_t Queue) {
+  if (offloadConfig().TracingEnabled) {
+    std::cout << "---> olFinishQueue";
+  }
+
+  ol_result_t Result = olFinishQueue_val(Queue);
+
+  if (offloadConfig().TracingEnabled) {
+    ol_finish_queue_params_t Params = {&Queue};
+    std::cout << "(" << &Params << ")";
+    std::cout << "-> " << Result << "\n";
+    if (Result && Result->Details) {
+      std::cout << "     *Error Details* " << Result->Details << " \n";
+    }
+  }
+  return Result;
+}
+ol_result_t olFinishQueueWithCodeLoc(ol_queue_handle_t Queue,
+                                     ol_code_location_t *CodeLocation) {
+  currentCodeLocation() = CodeLocation;
+  ol_result_t Result = olFinishQueue(Queue);
+
+  currentCodeLocation() = nullptr;
+  return Result;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+ol_impl_result_t olRetainEvent_val(ol_event_handle_t Event) {
+  if (true /*enableParameterValidation*/) {
+    if (NULL == Event) {
+      return OL_ERRC_INVALID_NULL_HANDLE;
+    }
+  }
+
+  return olRetainEvent_impl(Event);
+}
+OL_APIEXPORT ol_result_t OL_APICALL olRetainEvent(ol_event_handle_t Event) {
+  if (offloadConfig().TracingEnabled) {
+    std::cout << "---> olRetainEvent";
+  }
+
+  ol_result_t Result = olRetainEvent_val(Event);
+
+  if (offloadConfig().TracingEnabled) {
+    ol_retain_event_params_t Params = {&Event};
+    std::cout << "(" << &Params << ")";
+    std::cout << "-> " << Result << "\n";
+    if (Result && Result->Details) {
+      std::cout << "     *Error Details* " << Result->Details << " \n";
+    }
+  }
+  return Result;
+}
+ol_result_t olRetainEventWithCodeLoc(ol_event_handle_t Event,
+                                     ol_code_location_t *CodeLocation) {
+  currentCodeLocation() = CodeLocation;
+  ol_result_t Result = olRetainEvent(Event);
+
+  currentCodeLocation() = nullptr;
+  return Result;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+ol_impl_result_t olReleaseEvent_val(ol_event_handle_t Event) {
+  if (true /*enableParameterValidation*/) {
+    if (NULL == Event) {
+      return OL_ERRC_INVALID_NULL_HANDLE;
+    }
+  }
+
+  return olReleaseEvent_impl(Event);
+}
+OL_APIEXPORT ol_result_t OL_APICALL olReleaseEvent(ol_event_handle_t Event) {
+  if (offloadConfig().TracingEnabled) {
+    std::cout << "---> olReleaseEvent";
+  }
+
+  ol_result_t Result = olReleaseEvent_val(Event);
+
+  if (offloadConfig().TracingEnabled) {
+    ol_release_event_params_t Params = {&Event};
+    std::cout << "(" << &Params << ")";
+    std::cout << "-> " << Result << "\n";
+    if (Result && Result->Details) {
+      std::cout << "     *Error Details* " << Result->Details << " \n";
+    }
+  }
+  return Result;
+}
+ol_result_t olReleaseEventWithCodeLoc(ol_event_handle_t Event,
+                                      ol_code_location_t *CodeLocation) {
+  currentCodeLocation() = CodeLocation;
+  ol_result_t Result = olReleaseEvent(Event);
+
+  currentCodeLocation() = nullptr;
+  return Result;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+ol_impl_result_t olWaitEvent_val(ol_event_handle_t Event) {
+  if (true /*enableParameterValidation*/) {
+    if (NULL == Event) {
+      return OL_ERRC_INVALID_NULL_HANDLE;
+    }
+  }
+
+  return olWaitEvent_impl(Event);
+}
+OL_APIEXPORT ol_result_t OL_APICALL olWaitEvent(ol_event_handle_t Event) {
+  if (offloadConfig().TracingEnabled) {
+    std::cout << "---> olWaitEvent";
+  }
+
+  ol_result_t Result = olWaitEvent_val(Event);
+
+  if (offloadConfig().TracingEnabled) {
+    ol_wait_event_params_t Params = {&Event};
+    std::cout << "(" << &Params << ")";
+    std::cout << "-> " << Result << "\n";
+    if (Result && Result->Details) {
+      std::cout << "     *Error Details* " << Result->Details << " \n";
+    }
+  }
+  return Result;
+}
+ol_result_t olWaitEventWithCodeLoc(ol_event_handle_t Event,
+                                   ol_code_location_t *CodeLocation) {
+  currentCodeLocation() = CodeLocation;
+  ol_result_t Result = olWaitEvent(Event);
+
+  currentCodeLocation() = nullptr;
+  return Result;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+ol_impl_result_t olEnqueueDataWrite_val(ol_queue_handle_t Queue, void *SrcPtr,
+                                        void *DstPtr, size_t Size,
+                                        ol_event_handle_t *EventOut) {
+  if (true /*enableParameterValidation*/) {
+    if (NULL == Queue) {
+      return OL_ERRC_INVALID_NULL_HANDLE;
+    }
+
+    if (NULL == SrcPtr) {
+      return OL_ERRC_INVALID_NULL_POINTER;
+    }
+
+    if (NULL == DstPtr) {
+      return OL_ERRC_INVALID_NULL_POINTER;
+    }
+  }
+
+  return olEnqueueDataWrite_impl(Queue, SrcPtr, DstPtr, Size, EventOut);
+}
+OL_APIEXPORT ol_result_t OL_APICALL
+olEnqueueDataWrite(ol_queue_handle_t Queue, void *SrcPtr, void *DstPtr,
+                   size_t Size, ol_event_handle_t *EventOut) {
+  if (offloadConfig().TracingEnabled) {
+    std::cout << "---> olEnqueueDataWrite";
+  }
+
+  ol_result_t Result =
+      olEnqueueDataWrite_val(Queue, SrcPtr, DstPtr, Size, EventOut);
+
+  if (offloadConfig().TracingEnabled) {
+    ol_enqueue_data_write_params_t Params = {&Queue, &SrcPtr, &DstPtr, &Size,
+                                             &EventOut};
+    std::cout << "(" << &Params << ")";
+    std::cout << "-> " << Result << "\n";
+    if (Result && Result->Details) {
+      std::cout << "     *Error Details* " << Result->Details << " \n";
+    }
+  }
+  return Result;
+}
+ol_result_t olEnqueueDataWriteWithCodeLoc(ol_queue_handle_t Queue, void *SrcPtr,
+                                          void *DstPtr, size_t Size,
+                                          ol_event_handle_t *EventOut,
+                                          ol_code_location_t *CodeLocation) {
+  currentCodeLocation() = CodeLocation;
+  ol_result_t Result =
+      olEnqueueDataWrite(Queue, SrcPtr, DstPtr, Size, EventOut);
+
+  currentCodeLocation() = nullptr;
+  return Result;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+ol_impl_result_t olEnqueueDataRead_val(ol_queue_handle_t Queue, void *SrcPtr,
+                                       void *DstPtr, size_t Size,
+                                       ol_event_handle_t *EventOut) {
+  if (true /*enableParameterValidation*/) {
+    if (NULL == Queue) {
+      return OL_ERRC_INVALID_NULL_HANDLE;
+    }
+
+    if (NULL == SrcPtr) {
+      return OL_ERRC_INVALID_NULL_POINTER;
+    }
+
+    if (NULL == DstPtr) {
+      return OL_ERRC_INVALID_NULL_POINTER;
+    }
+  }
+
+  return olEnqueueDataRead_impl(Queue, SrcPtr, DstPtr, Size, EventOut);
+}
+OL_APIEXPORT ol_result_t OL_APICALL
+olEnqueueDataRead(ol_queue_handle_t Queue, void *SrcPtr, void *DstPtr,
+                  size_t Size, ol_event_handle_t *EventOut) {
+  if (offloadConfig().TracingEnabled) {
+    std::cout << "---> olEnqueueDataRead";
+  }
+
+  ol_result_t Result =
+      olEnqueueDataRead_val(Queue, SrcPtr, DstPtr, Size, EventOut);
+
+  if (offloadConfig().TracingEnabled) {
+    ol_enqueue_data_read_params_t Params = {&Queue, &SrcPtr, &DstPtr, &Size,
+                                            &EventOut};
+    std::cout << "(" << &Params << ")";
+    std::cout << "-> " << Result << "\n";
+    if (Result && Result->Details) {
+      std::cout << "     *Error Details* " << Result->Details << " \n";
+    }
+  }
+  return Result;
+}
+ol_result_t olEnqueueDataReadWithCodeLoc(ol_queue_handle_t Queue, void *SrcPtr,
+                                         void *DstPtr, size_t Size,
+                                         ol_event_handle_t *EventOut,
+                                         ol_code_location_t *CodeLocation) {
+  currentCodeLocation() = CodeLocation;
+  ol_result_t Result = olEnqueueDataRead(Queue, SrcPtr, DstPtr, Size, EventOut);
+
+  currentCodeLocation() = nullptr;
+  return Result;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+ol_impl_result_t olEnqueueDataCopy_val(ol_queue_handle_t Queue, void *SrcPtr,
+                                       void *DstPtr,
+                                       ol_device_handle_t DstDevice,
+                                       size_t Size,
+                                       ol_event_handle_t *EventOut) {
+  if (true /*enableParameterValidation*/) {
+    if (NULL == Queue) {
+      return OL_ERRC_INVALID_NULL_HANDLE;
+    }
+
+    if (NULL == DstDevice) {
+      return OL_ERRC_INVALID_NULL_HANDLE;
+    }
+
+    if (NULL == SrcPtr) {
+      return OL_ERRC_INVALID_NULL_POINTER;
+    }
+
+    if (NULL == DstPtr) {
+      return OL_ERRC_INVALID_NULL_POINTER;
+    }
+  }
+
+  return olEnqueueDataCopy_impl(Queue, SrcPtr, DstPtr, DstDevice, Size,
+                                EventOut);
+}
+OL_APIEXPORT ol_result_t OL_APICALL olEnqueueDataCopy(
+    ol_queue_handle_t Queue, void *SrcPtr, void *DstPtr,
+    ol_device_handle_t DstDevice, size_t Size, ol_event_handle_t *EventOut) {
+  if (offloadConfig().TracingEnabled) {
+    std::cout << "---> olEnqueueDataCopy";
+  }
+
+  ol_result_t Result =
+      olEnqueueDataCopy_val(Queue, SrcPtr, DstPtr, DstDevice, Size, EventOut);
+
+  if (offloadConfig().TracingEnabled) {
+    ol_enqueue_data_copy_params_t Params = {&Queue,     &SrcPtr, &DstPtr,
+                                            &DstDevice, &Size,   &EventOut};
+    std::cout << "(" << &Params << ")";
+    std::cout << "-> " << Result << "\n";
+    if (Result && Result->Details) {
+      std::cout << "     *Error Details* " << Result->Details << " \n";
+    }
+  }
+  return Result;
+}
+ol_result_t olEnqueueDataCopyWithCodeLoc(ol_queue_handle_t Queue, void *SrcPtr,
+                                         void *DstPtr,
+                                         ol_device_handle_t DstDevice,
+                                         size_t Size,
+                                         ol_event_handle_t *EventOut,
+                                         ol_code_location_t *CodeLocation) {
+  currentCodeLocation() = CodeLocation;
+  ol_result_t Result =
+      olEnqueueDataCopy(Queue, SrcPtr, DstPtr, DstDevice, Size, EventOut);
+
+  currentCodeLocation() = nullptr;
+  return Result;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+ol_impl_result_t olEnqueueKernelLaunch_val(ol_queue_handle_t Queue,
+                                           ol_kernel_handle_t Kernel,
+                                           const size_t *GlobalWorkSize,
+                                           ol_event_handle_t *EventOut) {
+  if (true /*enableParameterValidation*/) {
+    if (NULL == Queue) {
+      return OL_ERRC_INVALID_NULL_HANDLE;
+    }
+
+    if (NULL == Kernel) {
+      return OL_ERRC_INVALID_NULL_HANDLE;
+    }
+
+    if (NULL == GlobalWorkSize) {
+      return OL_ERRC_INVALID_NULL_POINTER;
+    }
+  }
+
+  return olEnqueueKernelLaunch_impl(Queue, Kernel, GlobalWorkSize, EventOut);
+}
+OL_APIEXPORT ol_result_t OL_APICALL olEnqueueKernelLaunch(
+    ol_queue_handle_t Queue, ol_kernel_handle_t Kernel,
+    const size_t *GlobalWorkSize, ol_event_handle_t *EventOut) {
+  if (offloadConfig().TracingEnabled) {
+    std::cout << "---> olEnqueueKernelLaunch";
+  }
+
+  ol_result_t Result =
+      olEnqueueKernelLaunch_val(Queue, Kernel, GlobalWorkSize, EventOut);
+
+  if (offloadConfig().TracingEnabled) {
+    ol_enqueue_kernel_launch_params_t Params = {&Queue, &Kernel,
+                                                &GlobalWorkSize, &EventOut};
+    std::cout << "(" << &Params << ")";
+    std::cout << "-> " << Result << "\n";
+    if (Result && Result->Details) {
+      std::cout << "     *Error Details* " << Result->Details << " \n";
+    }
+  }
+  return Result;
+}
+ol_result_t olEnqueueKernelLaunchWithCodeLoc(ol_queue_handle_t Queue,
+                                             ol_kernel_handle_t Kernel,
+                                             const size_t *GlobalWorkSize,
+                                             ol_event_handle_t *EventOut,
+                                             ol_code_location_t *CodeLocation) {
+  currentCodeLocation() = CodeLocation;
+  ol_result_t Result =
+      olEnqueueKernelLaunch(Queue, Kernel, GlobalWorkSize, EventOut);
+
+  currentCodeLocation() = nullptr;
+  return Result;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+ol_impl_result_t olCreateProgram_val(ol_device_handle_t Device, void *ProgData,
+                                     size_t ProgDataSize,
+                                     ol_program_handle_t *Queue) {
+  if (true /*enableParameterValidation*/) {
+    if (NULL == Device) {
+      return OL_ERRC_INVALID_NULL_HANDLE;
+    }
+
+    if (NULL == ProgData) {
+      return OL_ERRC_INVALID_NULL_POINTER;
+    }
+
+    if (NULL == Queue) {
+      return OL_ERRC_INVALID_NULL_POINTER;
+    }
+  }
+
+  return olCreateProgram_impl(Device, ProgData, ProgDataSize, Queue);
+}
+OL_APIEXPORT ol_result_t OL_APICALL
+olCreateProgram(ol_device_handle_t Device, void *ProgData, size_t ProgDataSize,
+                ol_program_handle_t *Queue) {
+  if (offloadConfig().TracingEnabled) {
+    std::cout << "---> olCreateProgram";
+  }
+
+  ol_result_t Result =
+      olCreateProgram_val(Device, ProgData, ProgDataSize, Queue);
+
+  if (offloadConfig().TracingEnabled) {
+    ol_create_program_params_t Params = {&Device, &ProgData, &ProgDataSize,
+                                         &Queue};
+    std::cout << "(" << &Params << ")";
+    std::cout << "-> " << Result << "\n";
+    if (Result && Result->Details) {
+      std::cout << "     *Error Details* " << Result->Details << " \n";
+    }
+  }
+  return Result;
+}
+ol_result_t olCreateProgramWithCodeLoc(ol_device_handle_t Device,
+                                       void *ProgData, size_t ProgDataSize,
+                                       ol_program_handle_t *Queue,
+                                       ol_code_location_t *CodeLocation) {
+  currentCodeLocation() = CodeLocation;
+  ol_result_t Result = olCreateProgram(Device, ProgData, ProgDataSize, Queue);
+
+  currentCodeLocation() = nullptr;
+  return Result;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+ol_impl_result_t olRetainProgram_val(ol_program_handle_t Program) {
+  if (true /*enableParameterValidation*/) {
+    if (NULL == Program) {
+      return OL_ERRC_INVALID_NULL_HANDLE;
+    }
+  }
+
+  return olRetainProgram_impl(Program);
+}
+OL_APIEXPORT ol_result_t OL_APICALL
+olRetainProgram(ol_program_handle_t Program) {
+  if (offloadConfig().TracingEnabled) {
+    std::cout << "---> olRetainProgram";
+  }
+
+  ol_result_t Result = olRetainProgram_val(Program);
+
+  if (offloadConfig().TracingEnabled) {
+    ol_retain_program_params_t Params = {&Program};
+    std::cout << "(" << &Params << ")";
+    std::cout << "-> " << Result << "\n";
+    if (Result && Result->Details) {
+      std::cout << "     *Error Details* " << Result->Details << " \n";
+    }
+  }
+  return Result;
+}
+ol_result_t olRetainProgramWithCodeLoc(ol_program_handle_t Program,
+                                       ol_code_location_t *CodeLocation) {
+  currentCodeLocation() = CodeLocation;
+  ol_result_t Result = olRetainProgram(Program);
+
+  currentCodeLocation() = nullptr;
+  return Result;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+ol_impl_result_t olReleaseProgram_val(ol_program_handle_t Program) {
+  if (true /*enableParameterValidation*/) {
+    if (NULL == Program) {
+      return OL_ERRC_INVALID_NULL_HANDLE;
+    }
+  }
+
+  return olReleaseProgram_impl(Program);
+}
+OL_APIEXPORT ol_result_t OL_APICALL
+olReleaseProgram(ol_program_handle_t Program) {
+  if (offloadConfig().TracingEnabled) {
+    std::cout << "---> olReleaseProgram";
+  }
+
+  ol_result_t Result = olReleaseProgram_val(Program);
+
+  if (offloadConfig().TracingEnabled) {
+    ol_release_program_params_t Params = {&Program};
+    std::cout << "(" << &Params << ")";
+    std::cout << "-> " << Result << "\n";
+    if (Result && Result->Details) {
+      std::cout << "     *Error Details* " << Result->Details << " \n";
+    }
+  }
+  return Result;
+}
+ol_result_t olReleaseProgramWithCodeLoc(ol_program_handle_t Program,
+                                        ol_code_location_t *CodeLocation) {
+  currentCodeLocation() = CodeLocation;
+  ol_result_t Result = olReleaseProgram(Program);
+
+  currentCodeLocation() = nullptr;
+  return Result;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+ol_impl_result_t olCreateKernel_val(ol_program_handle_t Program,
+                                    const char *KernelName,
+                                    ol_kernel_handle_t *Kernel) {
+  if (true /*enableParameterValidation*/) {
+    if (NULL == Program) {
+      return OL_ERRC_INVALID_NULL_HANDLE;
+    }
+
+    if (NULL == KernelName) {
+      return OL_ERRC_INVALID_NULL_POINTER;
+    }
+
+    if (NULL == Kernel) {
+      return OL_ERRC_INVALID_NULL_POINTER;
+    }
+  }
+
+  return olCreateKernel_impl(Program, KernelName, Kernel);
+}
+OL_APIEXPORT ol_result_t OL_APICALL olCreateKernel(ol_program_handle_t Program,
+                                                   const char *KernelName,
+                                                   ol_kernel_handle_t *Kernel) {
+  if (offloadConfig().TracingEnabled) {
+    std::cout << "---> olCreateKernel";
+  }
+
+  ol_result_t Result = olCreateKernel_val(Program, KernelName, Kernel);
+
+  if (offloadConfig().TracingEnabled) {
+    ol_create_kernel_params_t Params = {&Program, &KernelName, &Kernel};
+    std::cout << "(" << &Params << ")";
+    std::cout << "-> " << Result << "\n";
+    if (Result && Result->Details) {
+      std::cout << "     *Error Details* " << Result->Details << " \n";
+    }
+  }
+  return Result;
+}
+ol_result_t olCreateKernelWithCodeLoc(ol_program_handle_t Program,
+                                      const char *KernelName,
+                                      ol_kernel_handle_t *Kernel,
+                                      ol_code_location_t *CodeLocation) {
+  currentCodeLocation() = CodeLocation;
+  ol_result_t Result = olCreateKernel(Program, KernelName, Kernel);
+
+  currentCodeLocation() = nullptr;
+  return Result;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+ol_impl_result_t olRetainKernel_val(ol_kernel_handle_t Kernel) {
+  if (true /*enableParameterValidation*/) {
+    if (NULL == Kernel) {
+      return OL_ERRC_INVALID_NULL_HANDLE;
+    }
+  }
+
+  return olRetainKernel_impl(Kernel);
+}
+OL_APIEXPORT ol_result_t OL_APICALL olRetainKernel(ol_kernel_handle_t Kernel) {
+  if (offloadConfig().TracingEnabled) {
+    std::cout << "---> olRetainKernel";
+  }
+
+  ol_result_t Result = olRetainKernel_val(Kernel);
+
+  if (offloadConfig().TracingEnabled) {
+    ol_retain_kernel_params_t Params = {&Kernel};
+    std::cout << "(" << &Params << ")";
+    std::cout << "-> " << Result << "\n";
+    if (Result && Result->Details) {
+      std::cout << "     *Error Details* " << Result->Details << " \n";
+    }
+  }
+  return Result;
+}
+ol_result_t olRetainKernelWithCodeLoc(ol_kernel_handle_t Kernel,
+                                      ol_code_location_t *CodeLocation) {
+  currentCodeLocation() = CodeLocation;
+  ol_result_t Result = olRetainKernel(Kernel);
+
+  currentCodeLocation() = nullptr;
+  return Result;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+ol_impl_result_t olReleaseKernel_val(ol_kernel_handle_t Kernel) {
+  if (true /*enableParameterValidation*/) {
+    if (NULL == Kernel) {
+      return OL_ERRC_INVALID_NULL_HANDLE;
+    }
+  }
+
+  return olReleaseKernel_impl(Kernel);
+}
+OL_APIEXPORT ol_result_t OL_APICALL olReleaseKernel(ol_kernel_handle_t Kernel) {
+  if (offloadConfig().TracingEnabled) {
+    std::cout << "---> olReleaseKernel";
+  }
+
+  ol_result_t Result = olReleaseKernel_val(Kernel);
+
+  if (offloadConfig().TracingEnabled) {
+    ol_release_kernel_params_t Params = {&Kernel};
+    std::cout << "(" << &Params << ")";
+    std::cout << "-> " << Result << "\n";
+    if (Result && Result->Details) {
+      std::cout << "     *Error Details* " << Result->Details << " \n";
+    }
+  }
+  return Result;
+}
+ol_result_t olReleaseKernelWithCodeLoc(ol_kernel_handle_t Kernel,
+                                       ol_code_location_t *CodeLocation) {
+  currentCodeLocation() = CodeLocation;
+  ol_result_t Result = olReleaseKernel(Kernel);
+
+  currentCodeLocation() = nullptr;
+  return Result;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+ol_impl_result_t olSetKernelArgValue_val(ol_kernel_handle_t Kernel,
+                                         uint32_t Index, size_t Size,
+                                         void *ArgData) {
+  if (true /*enableParameterValidation*/) {
+    if (NULL == Kernel) {
+      return OL_ERRC_INVALID_NULL_HANDLE;
+    }
+
+    if (NULL == ArgData) {
+      return OL_ERRC_INVALID_NULL_POINTER;
+    }
+  }
+
+  return olSetKernelArgValue_impl(Kernel, Index, Size, ArgData);
+}
+OL_APIEXPORT ol_result_t OL_APICALL olSetKernelArgValue(
+    ol_kernel_handle_t Kernel, uint32_t Index, size_t Size, void *ArgData) {
+  if (offloadConfig().TracingEnabled) {
+    std::cout << "---> olSetKernelArgValue";
+  }
+
+  ol_result_t Result = olSetKernelArgValue_val(Kernel, Index, Size, ArgData);
+
+  if (offloadConfig().TracingEnabled) {
+    ol_set_kernel_arg_value_params_t Params = {&Kernel, &Index, &Size,
+                                               &ArgData};
+    std::cout << "(" << &Params << ")";
+    std::cout << "-> " << Result << "\n";
+    if (Result && Result->Details) {
+      std::cout << "     *Error Details* " << Result->Details << " \n";
+    }
+  }
+  return Result;
+}
+ol_result_t olSetKernelArgValueWithCodeLoc(ol_kernel_handle_t Kernel,
+                                           uint32_t Index, size_t Size,
+                                           void *ArgData,
+                                           ol_code_location_t *CodeLocation) {
+  currentCodeLocation() = CodeLocation;
+  ol_result_t Result = olSetKernelArgValue(Kernel, Index, Size, ArgData);
+
+  currentCodeLocation() = nullptr;
+  return Result;
+}
diff --git a/offload/liboffload/include/generated/OffloadFuncs.inc b/offload/liboffload/include/generated/OffloadFuncs.inc
index 26120f18279dcc..6f2bb34599a1db 100644
--- a/offload/liboffload/include/generated/OffloadFuncs.inc
+++ b/offload/liboffload/include/generated/OffloadFuncs.inc
@@ -22,6 +22,24 @@ OFFLOAD_FUNC(olGetDeviceInfo)
 OFFLOAD_FUNC(olGetDeviceInfoSize)
 OFFLOAD_FUNC(olMemAlloc)
 OFFLOAD_FUNC(olMemFree)
+OFFLOAD_FUNC(olCreateQueue)
+OFFLOAD_FUNC(olRetainQueue)
+OFFLOAD_FUNC(olReleaseQueue)
+OFFLOAD_FUNC(olFinishQueue)
+OFFLOAD_FUNC(olRetainEvent)
+OFFLOAD_FUNC(olReleaseEvent)
+OFFLOAD_FUNC(olWaitEvent)
+OFFLOAD_FUNC(olEnqueueDataWrite)
+OFFLOAD_FUNC(olEnqueueDataRead)
+OFFLOAD_FUNC(olEnqueueDataCopy)
+OFFLOAD_FUNC(olEnqueueKernelLaunch)
+OFFLOAD_FUNC(olCreateProgram)
+OFFLOAD_FUNC(olRetainProgram)
+OFFLOAD_FUNC(olReleaseProgram)
+OFFLOAD_FUNC(olCreateKernel)
+OFFLOAD_FUNC(olRetainKernel)
+OFFLOAD_FUNC(olReleaseKernel)
+OFFLOAD_FUNC(olSetKernelArgValue)
 OFFLOAD_FUNC(olInitWithCodeLoc)
 OFFLOAD_FUNC(olShutDownWithCodeLoc)
 OFFLOAD_FUNC(olGetPlatformWithCodeLoc)
@@ -34,5 +52,23 @@ OFFLOAD_FUNC(olGetDeviceInfoWithCodeLoc)
 OFFLOAD_FUNC(olGetDeviceInfoSizeWithCodeLoc)
 OFFLOAD_FUNC(olMemAllocWithCodeLoc)
 OFFLOAD_FUNC(olMemFreeWithCodeLoc)
+OFFLOAD_FUNC(olCreateQueueWithCodeLoc)
+OFFLOAD_FUNC(olRetainQueueWithCodeLoc)
+OFFLOAD_FUNC(olReleaseQueueWithCodeLoc)
+OFFLOAD_FUNC(olFinishQueueWithCodeLoc)
+OFFLOAD_FUNC(olRetainEventWithCodeLoc)
+OFFLOAD_FUNC(olReleaseEventWithCodeLoc)
+OFFLOAD_FUNC(olWaitEventWithCodeLoc)
+OFFLOAD_FUNC(olEnqueueDataWriteWithCodeLoc)
+OFFLOAD_FUNC(olEnqueueDataReadWithCodeLoc)
+OFFLOAD_FUNC(olEnqueueDataCopyWithCodeLoc)
+OFFLOAD_FUNC(olEnqueueKernelLaunchWithCodeLoc)
+OFFLOAD_FUNC(olCreateProgramWithCodeLoc)
+OFFLOAD_FUNC(olRetainProgramWithCodeLoc)
+OFFLOAD_FUNC(olReleaseProgramWithCodeLoc)
+OFFLOAD_FUNC(olCreateKernelWithCodeLoc)
+OFFLOAD_FUNC(olRetainKernelWithCodeLoc)
+OFFLOAD_FUNC(olReleaseKernelWithCodeLoc)
+OFFLOAD_FUNC(olSetKernelArgValueWithCodeLoc)
 
 #undef OFFLOAD_FUNC
diff --git a/offload/liboffload/include/generated/OffloadImplFuncDecls.inc b/offload/liboffload/include/generated/OffloadImplFuncDecls.inc
index f0a96081fd2431..9d21d8fc970908 100644
--- a/offload/liboffload/include/generated/OffloadImplFuncDecls.inc
+++ b/offload/liboffload/include/generated/OffloadImplFuncDecls.inc
@@ -43,3 +43,57 @@ ol_impl_result_t olMemAlloc_impl(ol_device_handle_t Device,
 
 ol_impl_result_t olMemFree_impl(ol_device_handle_t Device, ol_alloc_type_t Type,
                                 void *Address);
+
+ol_impl_result_t olCreateQueue_impl(ol_device_handle_t Device,
+                                    ol_queue_handle_t *Queue);
+
+ol_impl_result_t olRetainQueue_impl(ol_queue_handle_t Queue);
+
+ol_impl_result_t olReleaseQueue_impl(ol_queue_handle_t Queue);
+
+ol_impl_result_t olFinishQueue_impl(ol_queue_handle_t Queue);
+
+ol_impl_result_t olRetainEvent_impl(ol_event_handle_t Event);
+
+ol_impl_result_t olReleaseEvent_impl(ol_event_handle_t Event);
+
+ol_impl_result_t olWaitEvent_impl(ol_event_handle_t Event);
+
+ol_impl_result_t olEnqueueDataWrite_impl(ol_queue_handle_t Queue, void *SrcPtr,
+                                         void *DstPtr, size_t Size,
+                                         ol_event_handle_t *EventOut);
+
+ol_impl_result_t olEnqueueDataRead_impl(ol_queue_handle_t Queue, void *SrcPtr,
+                                        void *DstPtr, size_t Size,
+                                        ol_event_handle_t *EventOut);
+
+ol_impl_result_t olEnqueueDataCopy_impl(ol_queue_handle_t Queue, void *SrcPtr,
+                                        void *DstPtr,
+                                        ol_device_handle_t DstDevice,
+                                        size_t Size,
+                                        ol_event_handle_t *EventOut);
+
+ol_impl_result_t olEnqueueKernelLaunch_impl(ol_queue_handle_t Queue,
+                                            ol_kernel_handle_t Kernel,
+                                            const size_t *GlobalWorkSize,
+                                            ol_event_handle_t *EventOut);
+
+ol_impl_result_t olCreateProgram_impl(ol_device_handle_t Device, void *ProgData,
+                                      size_t ProgDataSize,
+                                      ol_program_handle_t *Queue);
+
+ol_impl_result_t olRetainProgram_impl(ol_program_handle_t Program);
+
+ol_impl_result_t olReleaseProgram_impl(ol_program_handle_t Program);
+
+ol_impl_result_t olCreateKernel_impl(ol_program_handle_t Program,
+                                     const char *KernelName,
+                                     ol_kernel_handle_t *Kernel);
+
+ol_impl_result_t olRetainKernel_impl(ol_kernel_handle_t Kernel);
+
+ol_impl_result_t olReleaseKernel_impl(ol_kernel_handle_t Kernel);
+
+ol_impl_result_t olSetKernelArgValue_impl(ol_kernel_handle_t Kernel,
+                                          uint32_t Index, size_t Size,
+                                          void *ArgData);
diff --git a/offload/liboffload/include/generated/OffloadPrint.hpp b/offload/liboffload/include/generated/OffloadPrint.hpp
index cff754237568e6..698b422fc38d06 100644
--- a/offload/liboffload/include/generated/OffloadPrint.hpp
+++ b/offload/liboffload/include/generated/OffloadPrint.hpp
@@ -22,6 +22,10 @@ template <typename T> struct is_handle : std::false_type {};
 template <> struct is_handle<ol_platform_handle_t> : std::true_type {};
 template <> struct is_handle<ol_device_handle_t> : std::true_type {};
 template <> struct is_handle<ol_context_handle_t> : std::true_type {};
+template <> struct is_handle<ol_queue_handle_t> : std::true_type {};
+template <> struct is_handle<ol_event_handle_t> : std::true_type {};
+template <> struct is_handle<ol_program_handle_t> : std::true_type {};
+template <> struct is_handle<ol_kernel_handle_t> : std::true_type {};
 template <typename T> inline constexpr bool is_handle_v = is_handle<T>::value;
 
 inline std::ostream &operator<<(std::ostream &os, enum ol_errc_t value);
@@ -455,6 +459,212 @@ inline std::ostream &operator<<(std::ostream &os,
   return os;
 }
 
+inline std::ostream &operator<<(std::ostream &os,
+                                const struct ol_create_queue_params_t *params) {
+  os << ".Device = ";
+  printPtr(os, *params->pDevice);
+  os << ", ";
+  os << ".Queue = ";
+  printPtr(os, *params->pQueue);
+  return os;
+}
+
+inline std::ostream &operator<<(std::ostream &os,
+                                const struct ol_retain_queue_params_t *params) {
+  os << ".Queue = ";
+  printPtr(os, *params->pQueue);
+  return os;
+}
+
+inline std::ostream &
+operator<<(std::ostream &os, const struct ol_release_queue_params_t *params) {
+  os << ".Queue = ";
+  printPtr(os, *params->pQueue);
+  return os;
+}
+
+inline std::ostream &operator<<(std::ostream &os,
+                                const struct ol_finish_queue_params_t *params) {
+  os << ".Queue = ";
+  printPtr(os, *params->pQueue);
+  return os;
+}
+
+inline std::ostream &operator<<(std::ostream &os,
+                                const struct ol_retain_event_params_t *params) {
+  os << ".Event = ";
+  printPtr(os, *params->pEvent);
+  return os;
+}
+
+inline std::ostream &
+operator<<(std::ostream &os, const struct ol_release_event_params_t *params) {
+  os << ".Event = ";
+  printPtr(os, *params->pEvent);
+  return os;
+}
+
+inline std::ostream &operator<<(std::ostream &os,
+                                const struct ol_wait_event_params_t *params) {
+  os << ".Event = ";
+  printPtr(os, *params->pEvent);
+  return os;
+}
+
+inline std::ostream &
+operator<<(std::ostream &os,
+           const struct ol_enqueue_data_write_params_t *params) {
+  os << ".Queue = ";
+  printPtr(os, *params->pQueue);
+  os << ", ";
+  os << ".SrcPtr = ";
+  printPtr(os, *params->pSrcPtr);
+  os << ", ";
+  os << ".DstPtr = ";
+  printPtr(os, *params->pDstPtr);
+  os << ", ";
+  os << ".Size = ";
+  os << *params->pSize;
+  os << ", ";
+  os << ".EventOut = ";
+  printPtr(os, *params->pEventOut);
+  return os;
+}
+
+inline std::ostream &
+operator<<(std::ostream &os,
+           const struct ol_enqueue_data_read_params_t *params) {
+  os << ".Queue = ";
+  printPtr(os, *params->pQueue);
+  os << ", ";
+  os << ".SrcPtr = ";
+  printPtr(os, *params->pSrcPtr);
+  os << ", ";
+  os << ".DstPtr = ";
+  printPtr(os, *params->pDstPtr);
+  os << ", ";
+  os << ".Size = ";
+  os << *params->pSize;
+  os << ", ";
+  os << ".EventOut = ";
+  printPtr(os, *params->pEventOut);
+  return os;
+}
+
+inline std::ostream &
+operator<<(std::ostream &os,
+           const struct ol_enqueue_data_copy_params_t *params) {
+  os << ".Queue = ";
+  printPtr(os, *params->pQueue);
+  os << ", ";
+  os << ".SrcPtr = ";
+  printPtr(os, *params->pSrcPtr);
+  os << ", ";
+  os << ".DstPtr = ";
+  printPtr(os, *params->pDstPtr);
+  os << ", ";
+  os << ".DstDevice = ";
+  printPtr(os, *params->pDstDevice);
+  os << ", ";
+  os << ".Size = ";
+  os << *params->pSize;
+  os << ", ";
+  os << ".EventOut = ";
+  printPtr(os, *params->pEventOut);
+  return os;
+}
+
+inline std::ostream &
+operator<<(std::ostream &os,
+           const struct ol_enqueue_kernel_launch_params_t *params) {
+  os << ".Queue = ";
+  printPtr(os, *params->pQueue);
+  os << ", ";
+  os << ".Kernel = ";
+  printPtr(os, *params->pKernel);
+  os << ", ";
+  os << ".GlobalWorkSize = ";
+  printPtr(os, *params->pGlobalWorkSize);
+  os << ", ";
+  os << ".EventOut = ";
+  printPtr(os, *params->pEventOut);
+  return os;
+}
+
+inline std::ostream &
+operator<<(std::ostream &os, const struct ol_create_program_params_t *params) {
+  os << ".Device = ";
+  printPtr(os, *params->pDevice);
+  os << ", ";
+  os << ".ProgData = ";
+  printPtr(os, *params->pProgData);
+  os << ", ";
+  os << ".ProgDataSize = ";
+  os << *params->pProgDataSize;
+  os << ", ";
+  os << ".Queue = ";
+  printPtr(os, *params->pQueue);
+  return os;
+}
+
+inline std::ostream &
+operator<<(std::ostream &os, const struct ol_retain_program_params_t *params) {
+  os << ".Program = ";
+  printPtr(os, *params->pProgram);
+  return os;
+}
+
+inline std::ostream &
+operator<<(std::ostream &os, const struct ol_release_program_params_t *params) {
+  os << ".Program = ";
+  printPtr(os, *params->pProgram);
+  return os;
+}
+
+inline std::ostream &
+operator<<(std::ostream &os, const struct ol_create_kernel_params_t *params) {
+  os << ".Program = ";
+  printPtr(os, *params->pProgram);
+  os << ", ";
+  os << ".KernelName = ";
+  printPtr(os, *params->pKernelName);
+  os << ", ";
+  os << ".Kernel = ";
+  printPtr(os, *params->pKernel);
+  return os;
+}
+
+inline std::ostream &
+operator<<(std::ostream &os, const struct ol_retain_kernel_params_t *params) {
+  os << ".Kernel = ";
+  printPtr(os, *params->pKernel);
+  return os;
+}
+
+inline std::ostream &
+operator<<(std::ostream &os, const struct ol_release_kernel_params_t *params) {
+  os << ".Kernel = ";
+  printPtr(os, *params->pKernel);
+  return os;
+}
+
+inline std::ostream &
+operator<<(std::ostream &os,
+           const struct ol_set_kernel_arg_value_params_t *params) {
+  os << ".Kernel = ";
+  printPtr(os, *params->pKernel);
+  os << ", ";
+  os << ".Index = ";
+  os << *params->pIndex;
+  os << ", ";
+  os << ".Size = ";
+  os << *params->pSize;
+  os << ", ";
+  os << ".ArgData = ";
+  printPtr(os, *params->pArgData);
+  return os;
+}
+
 ///////////////////////////////////////////////////////////////////////////////
 // @brief Print pointer value
 template <typename T>
diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp
index 3e609ed03917f4..d1c72ecced875c 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -34,6 +34,117 @@ struct ol_platform_handle_t_ {
   std::vector<ol_device_handle_t_> Devices;
 };
 
+struct ol_queue_handle_t_ {
+  __tgt_async_info *AsyncInfo;
+  ol_device_handle_t Device;
+  std::atomic_uint32_t RefCount;
+};
+
+struct ol_event_handle_t_ {
+  void *EventInfo;
+  ol_queue_handle_t Queue;
+  ol_device_handle_t Device;
+  std::atomic_uint32_t RefCount;
+};
+
+struct ol_program_handle_t_ {
+  llvm::omp::target::plugin::DeviceImageTy *Image;
+  std::atomic_uint32_t RefCount;
+};
+
+struct OffloadArguments {
+  static constexpr size_t MaxParamBytes = 4000u;
+  using args_t = std::array<char, MaxParamBytes>;
+  using args_size_t = std::vector<size_t>;
+  using args_index_t = std::vector<void *>;
+  args_t Storage;
+  args_size_t ParamSizes;
+  args_index_t Indices;
+  args_size_t OffsetPerIndex;
+
+  std::uint32_t ImplicitOffsetArgs[3] = {0, 0, 0};
+
+  // NOTE:
+  // This implementation is an exact copy of the CUDA adapter's argument
+  // implementation. Even though it was designed for CUDA, the design of
+  // libomptarget means it should work for other plugins as they will expect
+  // the same argument layout.
+  OffloadArguments() {
+    // Place the implicit offset index at the end of the indicies collection
+    Indices.emplace_back(&ImplicitOffsetArgs);
+  }
+
+  /// Add an argument to the kernel.
+  /// If the argument existed before, it is replaced.
+  /// Otherwise, it is added.
+  /// Gaps are filled with empty arguments.
+  /// Implicit offset argument is kept at the back of the indices collection.
+  void addArg(size_t Index, size_t Size, const void *Arg,
+              size_t LocalSize = 0) {
+    if (Index + 2 > Indices.size()) {
+      // Move implicit offset argument index with the end
+      Indices.resize(Index + 2, Indices.back());
+      // Ensure enough space for the new argument
+      ParamSizes.resize(Index + 1);
+      OffsetPerIndex.resize(Index + 1);
+    }
+    ParamSizes[Index] = Size;
+    // calculate the insertion point on the array
+    size_t InsertPos = std::accumulate(std::begin(ParamSizes),
+                                       std::begin(ParamSizes) + Index, 0);
+    // Update the stored value for the argument
+    std::memcpy(&Storage[InsertPos], Arg, Size);
+    Indices[Index] = &Storage[InsertPos];
+    OffsetPerIndex[Index] = LocalSize;
+  }
+
+  void addLocalArg(size_t Index, size_t Size) {
+    size_t LocalOffset = this->getLocalSize();
+
+    // maximum required alignment is the size of the largest vector type
+    const size_t MaxAlignment = sizeof(double) * 16;
+
+    // for arguments smaller than the maximum alignment simply align to the
+    // size of the argument
+    const size_t Alignment = std::min(MaxAlignment, Size);
+
+    // align the argument
+    size_t AlignedLocalOffset = LocalOffset;
+    size_t Pad = LocalOffset % Alignment;
+    if (Pad != 0) {
+      AlignedLocalOffset += Alignment - Pad;
+    }
+
+    addArg(Index, sizeof(size_t), (const void *)&(AlignedLocalOffset),
+           Size + (AlignedLocalOffset - LocalOffset));
+  }
+
+  void setImplicitOffset(size_t Size, std::uint32_t *ImplicitOffset) {
+    assert(Size == sizeof(std::uint32_t) * 3);
+    std::memcpy(ImplicitOffsetArgs, ImplicitOffset, Size);
+  }
+
+  void clearLocalSize() {
+    std::fill(std::begin(OffsetPerIndex), std::end(OffsetPerIndex), 0);
+  }
+
+  const args_index_t &getIndices() const noexcept { return Indices; }
+
+  uint32_t getLocalSize() const {
+    return std::accumulate(std::begin(OffsetPerIndex), std::end(OffsetPerIndex),
+                           0);
+  }
+
+  const char *getStorage() const noexcept { return Storage.data(); }
+};
+
+struct ol_kernel_handle_t_ {
+  ol_program_handle_t Program;
+  std::atomic_uint32_t RefCount;
+  GenericKernelTy *KernelImpl;
+  OffloadArguments Args;
+};
+
 using PlatformVecT = SmallVector<ol_platform_handle_t_, 4>;
 PlatformVecT &Platforms() {
   static PlatformVecT Platforms;
@@ -280,3 +391,252 @@ ol_impl_result_t olMemFree_impl(ol_device_handle_t Device, ol_alloc_type_t Type,
   }
   return OL_SUCCESS;
 }
+
+ol_impl_result_t olCreateQueue_impl(ol_device_handle_t Device,
+                                    ol_queue_handle_t *Queue) {
+  auto CreatedQueue = std::make_unique<ol_queue_handle_t_>();
+  auto Err = Device->Device.initAsyncInfo(&(CreatedQueue->AsyncInfo));
+  if (Err) {
+    return OL_ERRC_OUT_OF_RESOURCES;
+  }
+  // TODO: Check error
+  CreatedQueue->Device = Device;
+  CreatedQueue->RefCount = 1;
+  *Queue = CreatedQueue.release();
+  return OL_SUCCESS;
+}
+
+ol_impl_result_t olRetainQueue_impl(ol_queue_handle_t Queue) {
+  Queue->RefCount++;
+  return OL_SUCCESS;
+}
+
+ol_impl_result_t olReleaseQueue_impl(ol_queue_handle_t Queue) {
+  Queue->RefCount--;
+  return OL_SUCCESS;
+}
+
+ol_impl_result_t olFinishQueue_impl(ol_queue_handle_t Queue) {
+  // Host plugin doesn't have a queue set so it's not safe to call synchronize
+  // on it, but we have nothing to synchronize in that situation anyway.
+  if (Queue->AsyncInfo->Queue) {
+    auto Err = Queue->Device->Device.synchronize(Queue->AsyncInfo);
+    if (Err) {
+      return OL_ERRC_OUT_OF_RESOURCES;
+    }
+  }
+
+  // Recreate the stream resource so the queue can be reused
+  // TODO: Would be easier for the synchronization to (optionally) not release
+  // it to begin with.
+  auto Res = Queue->Device->Device.initAsyncInfo(&Queue->AsyncInfo);
+  if (Res) {
+    return OL_ERRC_OUT_OF_RESOURCES;
+  }
+
+  return OL_SUCCESS;
+}
+
+ol_impl_result_t olWaitEvent_impl(ol_event_handle_t Event) {
+  auto Res = Event->Device->Device.syncEvent(Event->EventInfo);
+  if (Res) {
+    return OL_ERRC_OUT_OF_RESOURCES;
+  }
+  return OL_SUCCESS;
+}
+
+ol_impl_result_t olRetainEvent_impl(ol_event_handle_t Event) {
+  Event->RefCount++;
+  return OL_SUCCESS;
+}
+
+ol_impl_result_t olReleaseEvent_impl(ol_event_handle_t Event) {
+  Event->RefCount--;
+  return OL_SUCCESS;
+}
+
+ol_event_handle_t makeEvent(ol_queue_handle_t Queue) {
+  auto EventImpl = std::make_unique<ol_event_handle_t_>();
+  EventImpl->Queue = Queue;
+  auto Res = Queue->Device->Device.createEvent(&EventImpl->EventInfo);
+  if (Res) {
+    return nullptr;
+  }
+  Res =
+      Queue->Device->Device.recordEvent(EventImpl->EventInfo, Queue->AsyncInfo);
+  if (Res) {
+    return nullptr;
+  }
+
+  return EventImpl.release();
+}
+
+ol_impl_result_t olEnqueueDataWrite_impl(ol_queue_handle_t Queue, void *SrcPtr,
+                                         void *DstPtr, size_t Size,
+                                         ol_event_handle_t *EventOut) {
+  auto &DeviceImpl = Queue->Device->Device;
+
+  auto Res = DeviceImpl.dataSubmit(DstPtr, SrcPtr, Size, Queue->AsyncInfo);
+
+  if (Res) {
+    return OL_ERRC_OUT_OF_RESOURCES;
+  }
+
+  if (EventOut) {
+    *EventOut = makeEvent(Queue);
+  }
+
+  return OL_SUCCESS;
+}
+
+ol_impl_result_t olEnqueueDataRead_impl(ol_queue_handle_t Queue, void *SrcPtr,
+                                        void *DstPtr, size_t Size,
+                                        ol_event_handle_t *EventOut) {
+  auto &DeviceImpl = Queue->Device->Device;
+
+  auto Res = DeviceImpl.dataRetrieve(DstPtr, SrcPtr, Size, Queue->AsyncInfo);
+
+  if (Res) {
+    return OL_ERRC_OUT_OF_RESOURCES;
+  }
+
+  if (EventOut) {
+    *EventOut = makeEvent(Queue);
+  }
+
+  return OL_SUCCESS;
+}
+
+ol_impl_result_t olEnqueueDataCopy_impl(ol_queue_handle_t Queue, void *SrcPtr,
+                                        void *DstPtr,
+                                        ol_device_handle_t DstDevice,
+                                        size_t Size,
+                                        ol_event_handle_t *EventOut) {
+  auto &DeviceImpl = Queue->Device->Device;
+
+  auto Res = DeviceImpl.dataExchange(SrcPtr, DstDevice->Device, DstPtr, Size,
+                                     Queue->AsyncInfo);
+
+  if (Res) {
+    return OL_ERRC_OUT_OF_RESOURCES;
+  }
+
+  if (EventOut) {
+    *EventOut = makeEvent(Queue);
+  }
+
+  return OL_SUCCESS;
+}
+
+ol_impl_result_t olCreateProgram_impl(ol_device_handle_t Device, void *ProgData,
+                                      size_t ProgDataSize,
+                                      ol_program_handle_t *Program) {
+
+  __tgt_device_image DeviceImage{
+      ProgData, ((char *)ProgData) + ProgDataSize - 1, nullptr, nullptr};
+
+  auto Res = Device->Device.loadBinary(Device->Device.Plugin, &DeviceImage);
+  if (!Res)
+    return OL_ERRC_INVALID_VALUE;
+
+  ol_program_handle_t Prog = new ol_program_handle_t_();
+  Prog->Image = *Res;
+  Prog->RefCount = 1;
+  *Program = Prog;
+
+  return OL_SUCCESS;
+}
+
+ol_impl_result_t olRetainProgram_impl(ol_program_handle_t Program) {
+  ++Program->RefCount;
+  return OL_SUCCESS;
+}
+
+ol_impl_result_t olReleaseProgram_impl(ol_program_handle_t Program) {
+  if (--Program->RefCount == 0) {
+    delete Program;
+  }
+  return OL_SUCCESS;
+}
+
+ol_impl_result_t olCreateKernel_impl(ol_program_handle_t Program,
+                                     const char *KernelName,
+                                     ol_kernel_handle_t *Kernel) {
+
+  auto &Device = Program->Image->getDevice();
+  auto KernelImpl = Device.constructKernel(KernelName);
+  if (!KernelImpl) {
+    return OL_ERRC_OUT_OF_RESOURCES;
+  }
+
+  auto Err = KernelImpl->init(Device, *Program->Image);
+  if (Err) {
+    return OL_ERRC_OUT_OF_RESOURCES;
+  }
+
+  ol_kernel_handle_t CreatedKernel = new ol_kernel_handle_t_();
+  CreatedKernel->Program = Program;
+  CreatedKernel->RefCount = 1;
+  CreatedKernel->KernelImpl = &*KernelImpl;
+  *Kernel = CreatedKernel;
+
+  return OL_SUCCESS;
+}
+
+ol_impl_result_t olRetainKernel_impl(ol_kernel_handle_t Kernel) {
+  Kernel->RefCount++;
+  return OL_SUCCESS;
+}
+
+ol_impl_result_t olReleaseKernel_impl(ol_kernel_handle_t Kernel) {
+  if (--Kernel->RefCount == 0) {
+    delete Kernel;
+  }
+  return OL_SUCCESS;
+}
+
+ol_impl_result_t olSetKernelArgValue_impl(ol_kernel_handle_t Kernel,
+                                          uint32_t Index, size_t Size,
+                                          void *ArgData) {
+  Kernel->Args.addArg(Index, Size, ArgData);
+
+  return OL_SUCCESS;
+}
+
+ol_impl_result_t olEnqueueKernelLaunch_impl(ol_queue_handle_t Queue,
+                                            ol_kernel_handle_t Kernel,
+                                            const size_t *GlobalWorkSize,
+                                            ol_event_handle_t *EventOut) {
+  auto &DeviceImpl = Queue->Device->Device;
+
+  AsyncInfoWrapperTy AsyncInfoWrapper(DeviceImpl, Queue->AsyncInfo);
+
+  KernelArgsTy LaunchArgs{};
+  LaunchArgs.NumArgs = Kernel->Args.getIndices().size() - 1; // TODO
+  LaunchArgs.NumTeams[0] = GlobalWorkSize[0];
+  LaunchArgs.NumTeams[1] = 1;
+  LaunchArgs.NumTeams[2] = 1;
+  LaunchArgs.ThreadLimit[0] = 1;
+  LaunchArgs.ThreadLimit[1] = 1;
+  LaunchArgs.ThreadLimit[2] = 1;
+
+  LaunchArgs.ArgPtrs = (void **)Kernel->Args.getStorage();
+
+  // TODO: Verify this
+  auto ArgOffsets = std::vector<ptrdiff_t>(LaunchArgs.NumArgs, 0ul);
+
+  auto Err = Kernel->KernelImpl->launch(
+      DeviceImpl, (void **)Kernel->Args.getStorage(), ArgOffsets.data(),
+      LaunchArgs, AsyncInfoWrapper);
+
+  AsyncInfoWrapper.finalize(Err);
+  if (Err) {
+    return OL_ERRC_OUT_OF_RESOURCES;
+  }
+
+  if (EventOut) {
+    *EventOut = makeEvent(Queue);
+  }
+
+  return OL_SUCCESS;
+}
diff --git a/offload/plugins-nextgen/common/include/GlobalHandler.h b/offload/plugins-nextgen/common/include/GlobalHandler.h
index d2914e7cd0eb4f..d65fceb8508d2e 100644
--- a/offload/plugins-nextgen/common/include/GlobalHandler.h
+++ b/offload/plugins-nextgen/common/include/GlobalHandler.h
@@ -131,8 +131,9 @@ class GenericGlobalHandlerTy {
 
   /// Get the address and size of a global in the image. Address and size are
   /// return in \p ImageGlobal, the global name is passed in \p ImageGlobal.
-  Error getGlobalMetadataFromImage(GenericDeviceTy &Device,
-                                   DeviceImageTy &Image, GlobalTy &ImageGlobal);
+  virtual Error getGlobalMetadataFromImage(GenericDeviceTy &Device,
+                                           DeviceImageTy &Image,
+                                           GlobalTy &ImageGlobal);
 
   /// Read the memory associated with a global from the image and store it on
   /// the host. The name, size, and destination are defined by \p HostGlobal.
diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp
index 894d1c2214b972..d7a69091ada747 100644
--- a/offload/plugins-nextgen/cuda/src/rtl.cpp
+++ b/offload/plugins-nextgen/cuda/src/rtl.cpp
@@ -1327,6 +1327,34 @@ class CUDAGlobalHandlerTy final : public GenericGlobalHandlerTy {
     DeviceGlobal.setPtr(reinterpret_cast<void *>(CUPtr));
     return Plugin::success();
   }
+
+  Error getGlobalMetadataFromImage(GenericDeviceTy &Device,
+                                   DeviceImageTy &Image,
+                                   GlobalTy &ImageGlobal) override {
+    // If the image is an ELF we can use the generic path, otherwise fall back
+    // and use cuModuleGetGlobal to query the image.
+    if (utils::elf::isELF(Image.getMemoryBuffer().getBuffer())) {
+      return GenericGlobalHandlerTy::getGlobalMetadataFromImage(Device, Image,
+                                                                ImageGlobal);
+    }
+
+    CUDADeviceImageTy &CUDAImage = static_cast<CUDADeviceImageTy &>(Image);
+
+    const char *GlobalName = ImageGlobal.getName().data();
+
+    size_t CUSize;
+    CUdeviceptr CUPtr;
+    CUresult Res =
+        cuModuleGetGlobal(&CUPtr, &CUSize, CUDAImage.getModule(), GlobalName);
+    if (auto Err = Plugin::check(Res, "Error in cuModuleGetGlobal for '%s': %s",
+                                 GlobalName))
+      return Err;
+
+    // Setup the global symbol's address and size.
+    ImageGlobal.setPtr(reinterpret_cast<void *>(CUPtr));
+    ImageGlobal.setSize(CUSize);
+    return Plugin::success();
+  }
 };
 
 /// Class implementing the CUDA-specific functionalities of the plugin.
diff --git a/offload/plugins-nextgen/host/src/rtl.cpp b/offload/plugins-nextgen/host/src/rtl.cpp
index 915c41e88c5828..1ba9a49f4f9aff 100644
--- a/offload/plugins-nextgen/host/src/rtl.cpp
+++ b/offload/plugins-nextgen/host/src/rtl.cpp
@@ -287,9 +287,9 @@ struct GenELF64DeviceTy : public GenericDeviceTy {
     return Plugin::success();
   }
 
-  /// This plugin does not support interoperability
+  /// This plugin does not support interoperability, do nothing
   Error initAsyncInfoImpl(AsyncInfoWrapperTy &AsyncInfoWrapper) override {
-    return Plugin::error("initAsyncInfoImpl not supported");
+    return Plugin::success();
   }
 
   /// This plugin does not support interoperability
diff --git a/offload/unittests/OffloadAPI/CMakeLists.txt b/offload/unittests/OffloadAPI/CMakeLists.txt
index 033ee2b6ec746a..e0d790684898d0 100644
--- a/offload/unittests/OffloadAPI/CMakeLists.txt
+++ b/offload/unittests/OffloadAPI/CMakeLists.txt
@@ -10,7 +10,9 @@ add_libompt_unittest("offload.unittests"
     ${CMAKE_CURRENT_SOURCE_DIR}/device/olGetDevice.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/device/olGetDeviceCount.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/device/olGetDeviceInfo.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/device/olGetDeviceInfoSize.cpp)
+    ${CMAKE_CURRENT_SOURCE_DIR}/device/olGetDeviceInfoSize.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/queue/olCreateQueue.cpp
+    )
 add_dependencies("offload.unittests" ${PLUGINS_TEST_COMMON})
 target_link_libraries("offload.unittests" PRIVATE ${PLUGINS_TEST_COMMON})
 target_include_directories("offload.unittests" PRIVATE ${PLUGINS_TEST_INCLUDE})
diff --git a/offload/unittests/OffloadAPI/queue/olCreateQueue.cpp b/offload/unittests/OffloadAPI/queue/olCreateQueue.cpp
new file mode 100644
index 00000000000000..f542dac4bb2d89
--- /dev/null
+++ b/offload/unittests/OffloadAPI/queue/olCreateQueue.cpp
@@ -0,0 +1,19 @@
+//===------- Offload API tests - olCreateQueue ----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "../common/Fixtures.hpp"
+#include <OffloadAPI.h>
+#include <gtest/gtest.h>
+
+using olCreateQueueTest = offloadDeviceTest;
+
+TEST_F(olCreateQueueTest, Success) {
+  ol_queue_handle_t Queue = nullptr;
+  ASSERT_SUCCESS(olCreateQueue(Device, &Queue));
+  ASSERT_NE(Queue, nullptr);
+}



More information about the llvm-commits mailing list